library(dplyr)
library(data.table)
library(conflicted)
conflict_prefer("mutate", "dplyr")
setwd("~/Desktop/Artículos/STEPS/Data/STEPS microdata/Afghanistan") #modify according to your PC and the STEPS 



### READ DATA FILE
data <- read.csv("afg2018.csv") #modify according to the STEPS file name




### EXTRACT DATA
attach(data)
df <- data.frame(
  study_id = "AFG_2018_STEPS_v01",  #replace with STEPS id
  country = "Afghanistan",   #replace with full name of the country (first letter capital, e.g., Peru)
  region  = "Eastern Mediterranean",   #replace with full name of the WHO region (first letter capital)
  data_year = 2018,   #replace with year of data collection (if two, use the latest: 2016-2017 -> 2017)
  coverage = "National",  #replace with "National" or "Subnational"
  
  psu     = psu,
  stratum = stratum,
  wstep1  = wstep1,
  wstep2  = wstep2,
  wstep3  = wstep3,
  
  survey_min_age = 18,   #replace with the youngest age target
  survey_max_age = 69,   #replace with the oldest age target
  
  participant_id = pid,   #if not available use 1:nrow(data)
  sex            = sex,   #replace with sex variable 
  age            = age,   #replace with age variable (years)
  is_urban       = urbanrural,   #replace with urban/rural variable (later change to urban=1, rural=0)
  is_pregnant = m8,
  
  weight = m12,   #replace with weight variable in kg
  height = m11,   #replace with height variable in cm
  waist = m14,
  
  sbp1 = m4a,
  sbp2 = m5a,
  sbp3 = m6a,
  dbp1 = m4b,
  dbp2 = m5b,
  dbp3 = m6b,
  bp_measured = h1,
  self_hyper = h2a,
  drug_hyper = h3,
  glucose_measured = h6, 
  self_diabetes = h7a, 
  drug_diabetes = h8,
  
  u_creatinine = b15,  #replace with spot urinary creatinine variable in mmol/l
  u_sodium = b14  #replace with spot urinary sodium variable in mmol/l
)
detach(data)

# ALWAYS "detach" AFTER "attach"



### CHECK AND RECODE 

##DEMOGRAPHIC VARIABLES
#always verify that men=1 and women=2 //
df <- df %>% mutate(sex = ifelse(sex  == "Men", 1, 2))

#always verify the units and change if needed // if height is in cm:
df$height<-df$height/100

table(df$is_pregnant)

#always verify that is_pregnant is coded like 0= no, 1= pregnant, convert with:
df[df == "Inf"] <- NA
df <- df %>% mutate(is_pregnant = ifelse(is_pregnant  == 1, 1, 0))
df <- df %>% mutate(is_pregnant = ifelse(sex == 1, 0, is_pregnant))

table(df$is_pregnant, df$sex)

table(df$is_urban)
df <- df %>% mutate(is_urban = ifelse(is_urban  == "u", 1, 0))
table(df$is_urban)


### ANTHROPOMETRIC VARIABLES
summary(df)
#recode abnormal (eg. anthropomethric measures: negative, "888" "999") values to NA
df <- df %>% mutate(waist = ifelse(waist <= 0, NA, waist))
df <- df %>% mutate(waist = ifelse(waist == 999, NA, waist))
df <- df %>% mutate(waist = ifelse(waist >= 888, NA, waist))

df <- df %>% mutate(height = ifelse(height <= 0, NA, height))
df <- df %>% mutate(height = ifelse(height == 999, NA, height))
df <- df %>% mutate(height = ifelse(height >= 8.8, NA, height))
df <- df %>% mutate(height = ifelse(height >= 5, NA, height))

df <- df %>% mutate(weight = ifelse(weight <= 0, NA, weight))
df <- df %>% mutate(weight = ifelse(weight == 999, NA, weight))
df <- df %>% mutate(weight = ifelse(weight >= 888, NA, weight))
summary(df)

##BP 

table(df$bp_measured)
table(df$self_hyper)
df <- df %>% mutate(self_hyper = ifelse(self_hyper  == 1, 1, 0))
df <- df %>% mutate(bp_measured = ifelse(bp_measured  == 1, 1, 0))

df <- df %>% mutate(self_hyper = ifelse(bp_measured  == 0, 0, self_hyper))
table(df$self_hyper)
summary(df$self_hyper)

table(df$drug_hyper)
df <- df %>% mutate(drug_hyper = ifelse(drug_hyper  == 1, 1, 0))
df <- df %>% mutate(drug_hyper = ifelse(self_hyper  == 0, 0, drug_hyper))

df <- df %>% mutate(sbp1 = ifelse(sbp1 >= 888, NA, sbp1))
df <- df %>% mutate(sbp2 = ifelse(sbp2 >= 888, NA, sbp2))
df <- df %>% mutate(sbp3 = ifelse(sbp3 >= 888, NA, sbp3))
df <- df %>% mutate(dbp1 = ifelse(dbp1 >= 888, NA, dbp1))
df <- df %>% mutate(dbp2 = ifelse(dbp2 >= 888, NA, dbp2))
df <- df %>% mutate(dbp3 = ifelse(dbp3 >= 888, NA, dbp3))

summary(df)

table(df$glucose_measured, df$self_diabetes, useNA = c("always"))
df <- df %>% mutate(self_diabetes = ifelse(self_diabetes  == 1, 1, 0))
df <- df %>% mutate(glucose_measured = ifelse(glucose_measured  == 1, 1, 0))

df <- df %>% mutate(self_diabetes = ifelse(glucose_measured  == 0, 0, self_diabetes))
table(df$drug_diabetes, useNA = c("always"))
df <- df %>% mutate(drug_diabetes = ifelse(drug_diabetes  == 1, 1, 0))
df <- df %>% mutate(drug_diabetes = ifelse(self_diabetes  == 0, 0, drug_diabetes))



###URINARY VARIABLES
#always verify that creatinine is in the mmol range (generally <50), convert with this if it is in mg/dl (100-400 range):
summary(df$u_creatinine)
df$u_creatinine<-df$u_creatinine*0.08842

#
summary(df$u_sodium)


### FINAL INSPECTION
View(df) #check your changes
summary(df)   #verify abnormal values (convert them to NA), "inf" value is acceptable


### SAVE FINAL DATASET
#We should have a dataframe with 30 variables as the result of the extraction, number of observations should be the same as "data"
setwd("~/Desktop/Artículos/STEPS/Extracted")   #modify according to your PC
write.csv(df, "Afghanistan_2018.csv", row.names = FALSE)   #create new csv file, use country full name and year, for example "Afghanistan_2018.csv"

rm(data, df)





##ARMENIA
setwd("~/Desktop/Artículos/STEPS/STEPS microdata/Armenia") #modify according to your PC and the STEPS 





### READ DATA FILE
data <- read.csv("arm2016.csv") #modify according to the STEPS file name





### EXTRACT DATA
attach(data)
df <- data.frame(
  study_id = "ARM_2016_STEPS_v01",  #replace with STEPS id
  country = "Armenia",   #replace with full name of the country (first letter capital, e.g., Peru)
  region  = "Europe",   #replace with full name of the WHO region (first letter capital)
  data_year = 2016,   #replace with year of data collection (if two, use the latest: 2016-2017 -> 2017)
  coverage = "National",  #replace with "National" or "Subnational"
  
  psu     = psu,
  stratum = stratum,
  wstep1  = wstep1,
  wstep2  = wstep2,
  wstep3  = wstep3,
  
  survey_min_age = 18,   #replace with the youngest age target
  survey_max_age = 69,   #replace with the oldest age target
  
  participant_id = pid,   #if not available use 1:nrow(data)
  sex            = sex,   #replace with sex variable 
  age            = age,   #replace with age variable (years)
  is_urban       = stratum,   #replace with urban/rural variable (later change to urban=1, rural=0)
  is_pregnant = m8,
  
  weight = m12,   #replace with weight variable in kg
  height = m11,   #replace with height variable in cm
  waist = m14,
  
  sbp1 = m4a,
  sbp2 = m5a,
  sbp3 = m6a,
  dbp1 = m4b,
  dbp2 = m5b,
  dbp3 = m6b,
  bp_measured = h1,
  self_hyper = h2a,
  drug_hyper = h3,
  glucose_measured = h6, 
  self_diabetes = h7a, 
  drug_diabetes = h8,
  
  u_creatinine = b15,  #replace with spot urinary creatinine variable in mmol/l
  u_sodium = b14  #replace with spot urinary sodium variable in mmol/l
)
detach(data)

# ALWAYS "detach" AFTER "attach"



### CHECK AND RECODE 

## DEMOGRAPHIC VARIABLES
#always verify that men=1 and women=2 //
df <- df %>% mutate(sex = ifelse(sex  == "Men", 1, 2))

#always verify the units and change if needed // if height is in cm:
df$height<-df$height/100

table(df$is_pregnant)
#always verify that is_pregnant is coded like 0= no, 1= pregnant, convert with:
df[df == "Inf"] <- NA #inf means missing
df <- df %>% mutate(is_pregnant = ifelse(is_pregnant  == 1, 1, 0))
df <- df %>% mutate(is_pregnant = ifelse(sex == 1, 0, is_pregnant))
table(df$is_pregnant, df$sex)

table(df$is_urban) #recode  urban = 1, rural = 0 if necessary
df <- df %>% mutate(is_urban = ifelse(is_urban  == 1, 1, 0))
table(df$is_urban)

### ANTHROPOMETRIC VARIABLES
summary(df)

#recode abnormal (eg. anthropomethric measures: negative, "888" "999") values to NA
df <- df %>% mutate(waist = ifelse(waist <= 0, NA, waist))
df <- df %>% mutate(waist = ifelse(waist == 999, NA, waist))
df <- df %>% mutate(waist = ifelse(waist >= 888, NA, waist))

df <- df %>% mutate(height = ifelse(height <= 0, NA, height))
df <- df %>% mutate(height = ifelse(height == 999, NA, height))
df <- df %>% mutate(height = ifelse(height >= 8.8, NA, height))
df <- df %>% mutate(height = ifelse(height >= 5, NA, height))

df <- df %>% mutate(weight = ifelse(weight <= 0, NA, weight))
df <- df %>% mutate(weight = ifelse(weight == 999, NA, weight))
df <- df %>% mutate(weight = ifelse(weight >= 888, NA, weight))



##BP 
table(df$bp_measured)
table(df$self_hyper)
df <- df %>% mutate(self_hyper = ifelse(self_hyper  == 1, 1, 0))
df <- df %>% mutate(bp_measured = ifelse(bp_measured  == 1, 1, 0))

df <- df %>% mutate(self_hyper = ifelse(bp_measured  == 0, 0, self_hyper))
table(df$self_hyper)
summary(df$self_hyper)

table(df$drug_hyper)
df <- df %>% mutate(drug_hyper = ifelse(drug_hyper  == 1, 1, 0))
df <- df %>% mutate(drug_hyper = ifelse(self_hyper  == 0, 0, drug_hyper))

df <- df %>% mutate(sbp1 = ifelse(sbp1 >= 888, NA, sbp1))
df <- df %>% mutate(sbp2 = ifelse(sbp2 >= 888, NA, sbp2))
df <- df %>% mutate(sbp3 = ifelse(sbp3 >= 888, NA, sbp3))
df <- df %>% mutate(dbp1 = ifelse(dbp1 >= 888, NA, dbp1))
df <- df %>% mutate(dbp2 = ifelse(dbp2 >= 888, NA, dbp2))
df <- df %>% mutate(dbp3 = ifelse(dbp3 >= 888, NA, dbp3))


table(df$glucose_measured, df$self_diabetes, useNA = c("always"))
df <- df %>% mutate(self_diabetes = ifelse(self_diabetes  == 1, 1, 0))
df <- df %>% mutate(glucose_measured = ifelse(glucose_measured  == 1, 1, 0))

df <- df %>% mutate(self_diabetes = ifelse(glucose_measured  == 0, 0, self_diabetes))
table(df$drug_diabetes, useNA = c("always"))
df <- df %>% mutate(drug_diabetes = ifelse(drug_diabetes  == 1, 1, 0))
df <- df %>% mutate(drug_diabetes = ifelse(self_diabetes  == 0, 0, drug_diabetes))


##URINARY VARIABLES

#always verify that creatinine is in the mmol range (generally <50), convert with this if it is in mg/dl (100-400 range):
summary(df$u_creatinine)

#df$u_creatinine<-df$u_creatinine*0.08842

summary(df$u_sodium)


### FINAL INSPECTION
View(df) #check your changes
summary(df)   #verify abnormal values (convert them to NA), "inf" value is acceptable



### SAVE FINAL DATASET
#We should have a dataframe with 30 variables as the result of the extraction, number of observations should be the same as "data"
setwd("~/Desktop/Artículos/STEPS/Extracted")   #modify according to your PC
write.csv(df, "Armenia_2016.csv", row.names = FALSE)   #create new csv file, use country full name and year, for example "Afghanistan_2018.csv"

rm(data, df)




setwd("~/Desktop/Artículos/STEPS/STEPS microdata/Azerbaijan") #modify according to your PC and the STEPS 





### READ DATA FILE
data <- read.csv("aze2017.csv") #modify according to the STEPS file name





### EXTRACT DATA
attach(data)
df <- data.frame(
  study_id = "AZE_2017_STEPS_v01",  #replace with STEPS id
  country = "Azerbaijan",   #replace with full name of the country (first letter capital, e.g., Peru)
  region  = "Europe",   #replace with full name of the WHO region (first letter capital)
  data_year = 2017,   #replace with year of data collection (if two, use the latest: 2016-2017 -> 2017)
  coverage = "National",  #replace with "National" or "Subnational"
  
  psu     = psu,
  stratum = stratum,
  wstep1  = wstep1,
  wstep2  = wstep2,
  wstep3  = wstep3,
  
  survey_min_age = 18,   #replace with the youngest age target
  survey_max_age = 69,   #replace with the oldest age target
  
  participant_id = pid,   #if not available use 1:nrow(data)
  sex            = sex,   #replace with sex variable 
  age            = age,   #replace with age variable (years)
  is_urban       = stratum,   #replace with urban/rural variable (later change to urban=1, rural=0)
  is_pregnant = m8,
  
  weight = m12,   #replace with weight variable in kg
  height = m11,   #replace with height variable in cm
  waist = m14,
  
  sbp1 = m4a,
  sbp2 = m5a,
  sbp3 = m6a,
  dbp1 = m4b,
  dbp2 = m5b,
  dbp3 = m6b,
  bp_measured = h1,
  self_hyper = h2a,
  drug_hyper = h3,
  glucose_measured = h6, 
  self_diabetes = h7a, 
  drug_diabetes = h8,
  
  u_creatinine = b15,  #replace with spot urinary creatinine variable in mmol/l
  u_sodium = b14  #replace with spot urinary sodium variable in mmol/l
)
detach(data)

# ALWAYS "detach" AFTER "attach"



### CHECK AND RECODE 

## DEMOGRAPHIC VARIABLES
#always verify that men=1 and women=2 //
df <- df %>% mutate(sex = ifelse(sex  == "Men", 1, 2))

#always verify the units and change if needed // if height is in cm:
df$height<-df$height/100

table(df$is_pregnant)
#always verify that is_pregnant is coded like 0= no, 1= pregnant, convert with:
df[df == "Inf"] <- NA #inf means missing
df <- df %>% mutate(is_pregnant = ifelse(is_pregnant  == 1, 1, 0))
df <- df %>% mutate(is_pregnant = ifelse(sex == 1, 0, is_pregnant))
table(df$is_pregnant, df$sex)

table(df$is_urban) #recode  urban = 1, rural = 0 if necessary
df <- df %>% mutate(is_urban = ifelse(is_urban  == 1, 1, 0))
table(df$is_urban)


### ANTHROPOMETRIC VARIABLES
summary(df)

#recode abnormal (eg. anthropomethric measures: negative, "888" "999") values to NA
df <- df %>% mutate(waist = ifelse(waist <= 0, NA, waist))
df <- df %>% mutate(waist = ifelse(waist == 999, NA, waist))
df <- df %>% mutate(waist = ifelse(waist >= 888, NA, waist))

df <- df %>% mutate(height = ifelse(height <= 0, NA, height))
df <- df %>% mutate(height = ifelse(height == 999, NA, height))
df <- df %>% mutate(height = ifelse(height >= 8.8, NA, height))
df <- df %>% mutate(height = ifelse(height >= 5, NA, height))

df <- df %>% mutate(weight = ifelse(weight <= 0, NA, weight))
df <- df %>% mutate(weight = ifelse(weight == 999, NA, weight))
df <- df %>% mutate(weight = ifelse(weight >= 888, NA, weight))


##BP 
table(df$bp_measured)
table(df$self_hyper)
df <- df %>% mutate(self_hyper = ifelse(self_hyper  == 1, 1, 0))
df <- df %>% mutate(bp_measured = ifelse(bp_measured  == 1, 1, 0))

df <- df %>% mutate(self_hyper = ifelse(bp_measured  == 0, 0, self_hyper))
table(df$self_hyper)
summary(df$self_hyper)

table(df$drug_hyper)
df <- df %>% mutate(drug_hyper = ifelse(drug_hyper  == 1, 1, 0))
df <- df %>% mutate(drug_hyper = ifelse(self_hyper  == 0, 0, drug_hyper))


df <- df %>% mutate(sbp1 = ifelse(sbp1 >= 888, NA, sbp1))
df <- df %>% mutate(sbp2 = ifelse(sbp2 >= 888, NA, sbp2))
df <- df %>% mutate(sbp3 = ifelse(sbp3 >= 888, NA, sbp3))
df <- df %>% mutate(dbp1 = ifelse(dbp1 >= 888, NA, dbp1))
df <- df %>% mutate(dbp2 = ifelse(dbp2 >= 888, NA, dbp2))
df <- df %>% mutate(dbp3 = ifelse(dbp3 >= 888, NA, dbp3))


table(df$glucose_measured, df$self_diabetes, useNA = c("always"))
df <- df %>% mutate(self_diabetes = ifelse(self_diabetes  == 1, 1, 0))
df <- df %>% mutate(glucose_measured = ifelse(glucose_measured  == 1, 1, 0))

df <- df %>% mutate(self_diabetes = ifelse(glucose_measured  == 0, 0, self_diabetes))
table(df$drug_diabetes, useNA = c("always"))
df <- df %>% mutate(drug_diabetes = ifelse(drug_diabetes  == 1, 1, 0))
df <- df %>% mutate(drug_diabetes = ifelse(self_diabetes  == 0, 0, drug_diabetes))

##URINARY VARIABLES

#always verify that creatinine is in the mmol range (generally <50), convert with this if it is in mg/dl (100-400 range):
summary(df$u_creatinine)

#df$u_creatinine<-df$u_creatinine*0.08842


### FINAL INSPECTION
View(df) #check your changes
summary(df)   #verify abnormal values (convert them to NA), "inf" value is acceptable



### SAVE FINAL DATASET
#We should have a dataframe with 30 variables as the result of the extraction, number of observations should be the same as "data"
setwd("~/Desktop/Artículos/STEPS/Extracted")   #modify according to your PC
write.csv(df, "Azerbaijan_2017.csv", row.names = FALSE)   #create new csv file, use country full name and year, for example "Afghanistan_2018.csv"

rm(data, df)




#Bangladesh

setwd("~/Desktop/Artículos/STEPS/STEPS microdata/Bangladesh") #modify according to your PC and the STEPS 





### READ DATA FILE
data <- read.csv("bgd2018.csv") #modify according to the STEPS file name





### EXTRACT DATA
attach(data)
df <- data.frame(
  study_id = "BGD_2018_STEPS_v01",  #replace with STEPS id
  country = "Bangladesh",   #replace with full name of the country (first letter capital, e.g., Peru)
  region  = "Southeast Asia",   #replace with full name of the WHO region (first letter capital)
  data_year = 2018,   #replace with year of data collection (if two, use the latest: 2016-2017 -> 2017)
  coverage = "National",  #replace with "National" or "Subnational"
  
  psu     = psu,
  stratum = stratum,
  wstep1  = wstep1,
  wstep2  = wstep2,
  wstep3  = wstep3,
  
  survey_min_age = 18,   #replace with the youngest age target
  survey_max_age = 69,   #replace with the oldest age target
  
  participant_id = pid,   #if not available use 1:nrow(data)
  sex            = sex,   #replace with sex variable 
  age            = age,   #replace with age variable (years)
  is_urban       = urbanrural,   #replace with urban/rural variable (later change to urban=1, rural=0)
  is_pregnant = m8,
  
  weight = m12,   #replace with weight variable in kg
  height = m11,   #replace with height variable in cm
  waist = m14,
  
  sbp1 = m4a,
  sbp2 = m5a,
  sbp3 = m6a,
  dbp1 = m4b,
  dbp2 = m5b,
  dbp3 = m6b,
  bp_measured = h1,
  self_hyper = h2a,
  drug_hyper =h3,
  glucose_measured = h6, 
  self_diabetes = h7a, 
  drug_diabetes = h8,
  
  u_creatinine = b15,  #replace with spot urinary creatinine variable in mmol/l
  u_sodium = b14  #replace with spot urinary sodium variable in mmol/l
)
detach(data)

# ALWAYS "detach" AFTER "attach"



### CHECK AND RECODE 

## DEMOGRAPHIC VARIABLES
#always verify that men=1 and women=2 //
df <- df %>% mutate(sex = ifelse(sex  == 1, 1, 2))

#always verify the units and change if needed // if height is in cm:
df$height<-df$height/100

table(df$is_pregnant)
#always verify that is_pregnant is coded like 0= no, 1= pregnant, convert with:
df[df == "Inf"] <- NA #inf means missing
df <- df %>% mutate(is_pregnant = ifelse(is_pregnant  == 1, 1, 0))
df <- df %>% mutate(is_pregnant = ifelse(sex == 1, 0, is_pregnant))
table(df$is_pregnant, df$sex)

table(df$is_urban) #recode  urban = 1, rural = 0 if necessary
df <- df %>% mutate(is_urban = ifelse(is_urban  == "Urban", 1, 0))


### ANTHROPOMETRIC VARIABLES
summary(df)

#recode abnormal (eg. anthropomethric measures: negative, "888" "999") values to NA
df <- df %>% mutate(waist = ifelse(waist <= 0, NA, waist))
df <- df %>% mutate(waist = ifelse(waist == 999, NA, waist))
df <- df %>% mutate(waist = ifelse(waist >= 888, NA, waist))

df <- df %>% mutate(height = ifelse(height <= 0, NA, height))
df <- df %>% mutate(height = ifelse(height == 999, NA, height))
df <- df %>% mutate(height = ifelse(height >= 8.8, NA, height))
df <- df %>% mutate(height = ifelse(height >= 5, NA, height))

df <- df %>% mutate(weight = ifelse(weight <= 0, NA, weight))
df <- df %>% mutate(weight = ifelse(weight == 999, NA, weight))
df <- df %>% mutate(weight = ifelse(weight >= 888, NA, weight))


##BP 
table(df$bp_measured)
table(df$self_hyper)
df <- df %>% mutate(self_hyper = ifelse(self_hyper  == 1, 1, 0))
df <- df %>% mutate(bp_measured = ifelse(bp_measured  == 1, 1, 0))

df <- df %>% mutate(self_hyper = ifelse(bp_measured  == 0, 0, self_hyper))
table(df$self_hyper)
summary(df$self_hyper)

table(df$drug_hyper)
df <- df %>% mutate(drug_hyper = ifelse(drug_hyper  == 1, 1, 0))
df <- df %>% mutate(drug_hyper = ifelse(self_hyper  == 0, 0, drug_hyper))

df <- df %>% mutate(sbp1 = ifelse(sbp1 >= 888, NA, sbp1))
df <- df %>% mutate(sbp2 = ifelse(sbp2 >= 888, NA, sbp2))
df <- df %>% mutate(sbp3 = ifelse(sbp3 >= 888, NA, sbp3))
df <- df %>% mutate(dbp1 = ifelse(dbp1 >= 888, NA, dbp1))
df <- df %>% mutate(dbp2 = ifelse(dbp2 >= 888, NA, dbp2))
df <- df %>% mutate(dbp3 = ifelse(dbp3 >= 888, NA, dbp3))



table(df$glucose_measured, df$self_diabetes, useNA = c("always"))
df <- df %>% mutate(self_diabetes = ifelse(self_diabetes  == 1, 1, 0))
df <- df %>% mutate(glucose_measured = ifelse(glucose_measured  == 1, 1, 0))

df <- df %>% mutate(self_diabetes = ifelse(glucose_measured  == 0, 0, self_diabetes))
table(df$drug_diabetes, useNA = c("always"))
df <- df %>% mutate(drug_diabetes = ifelse(drug_diabetes  == 1, 1, 0))
df <- df %>% mutate(drug_diabetes = ifelse(self_diabetes  == 0, 0, drug_diabetes))

##URINARY VARIABLES

#always verify that creatinine is in the mmol range (generally <50), convert with this if it is in mg/dl (100-400 range):
summary(df$u_creatinine)

df$u_creatinine<-df$u_creatinine*0.08842


### FINAL INSPECTION
View(df) #check your changes
summary(df)   #verify abnormal values (convert them to NA), "inf" value is acceptable



### SAVE FINAL DATASET
#We should have a dataframe with 30 variables as the result of the extraction, number of observations should be the same as "data"
setwd("~/Desktop/Artículos/STEPS/Extracted")   #modify according to your PC
write.csv(df, "Bangladesh_2019.csv", row.names = FALSE)   #create new csv file, use country full name and year, for example "Afghanistan_2018.csv"

rm(data, df)



##Belarus
setwd("~/Desktop/Artículos/STEPS/STEPS microdata/Belarus") #modify according to your PC and the STEPS 





### READ DATA FILE
data <- read.csv("blr2016.csv") #modify according to the STEPS file name




### EXTRACT DATA
attach(data)
df <- data.frame(
  study_id = "BLR_2016_STEPS_v01",  #replace with STEPS id
  country = "Belarus",   #replace with full name of the country (first letter capital, e.g., Peru)
  region  = "Europe",   #replace with full name of the WHO region (first letter capital)
  data_year = 2017,   #replace with year of data collection (if two, use the latest: 2016-2017 -> 2017)
  coverage = "National",  #replace with "National" or "Subnational"
  
  psu     = psu,
  stratum = stratum,
  wstep1  = wstep1,
  wstep2  = wstep2,
  wstep3  = wstep3,
  
  survey_min_age = 18,   #replace with the youngest age target
  survey_max_age = 69,   #replace with the oldest age target
  
  participant_id = pid,   #if not available use 1:nrow(data)
  sex            = sex,   #replace with sex variable 
  age            = age,   #replace with age variable (years)
  is_urban       = NA,   #replace with urban/rural variable (later change to urban=1, rural=0)
  is_pregnant = m8,
  
  weight = m12,   #replace with weight variable in kg
  height = m11,   #replace with height variable in cm
  waist = m14,
  
  sbp1 = m4a,
  sbp2 = m5a,
  sbp3 = m6a,
  dbp1 = m4b,
  dbp2 = m5b,
  dbp3 = m6b,
  bp_measured = h1,
  self_hyper = h2a,
  drug_hyper = h3,
  glucose_measured = h6, 
  self_diabetes = h7a, 
  drug_diabetes = h8,
  
  u_creatinine = b15,  #replace with spot urinary creatinine variable in mmol/l
  u_sodium = b14  #replace with spot urinary sodium variable in mmol/l
)
detach(data)

# ALWAYS "detach" AFTER "attach"



### CHECK AND RECODE 

## DEMOGRAPHIC VARIABLES
#always verify that men=1 and women=2 //
df <- df %>% mutate(sex = ifelse(sex  == "Men", 1, 2))

#always verify the units and change if needed // if height is in cm:
df$height<-df$height/100

table(df$is_pregnant)
#always verify that is_pregnant is coded like 0= no, 1= pregnant, convert with:
df[df == "Inf"] <- NA #inf means missing
df <- df %>% mutate(is_pregnant = ifelse(is_pregnant  == 1, 1, 0))
df <- df %>% mutate(is_pregnant = ifelse(sex == 1, 0, is_pregnant))
table(df$is_pregnant, df$sex)

table(df$is_urban) #recode  urban = 1, rural = 0 if necessary

### ANTHROPOMETRIC VARIABLES
summary(df)

#recode abnormal (eg. anthropomethric measures: negative, "888" "999") values to NA
df <- df %>% mutate(waist = ifelse(waist <= 0, NA, waist))
df <- df %>% mutate(waist = ifelse(waist == 999, NA, waist))
df <- df %>% mutate(waist = ifelse(waist >= 888, NA, waist))

df <- df %>% mutate(height = ifelse(height <= 0, NA, height))
df <- df %>% mutate(height = ifelse(height == 999, NA, height))
df <- df %>% mutate(height = ifelse(height >= 8.8, NA, height))
df <- df %>% mutate(height = ifelse(height >= 5, NA, height))

df <- df %>% mutate(weight = ifelse(weight <= 0, NA, weight))
df <- df %>% mutate(weight = ifelse(weight == 999, NA, weight))
df <- df %>% mutate(weight = ifelse(weight >= 888, NA, weight))


##BP 
table(df$bp_measured)
table(df$self_hyper)
df <- df %>% mutate(self_hyper = ifelse(self_hyper  == 1, 1, 0))
df <- df %>% mutate(bp_measured = ifelse(bp_measured  == 1, 1, 0))

df <- df %>% mutate(self_hyper = ifelse(bp_measured  == 0, 0, self_hyper))
table(df$self_hyper)
summary(df$self_hyper)

table(df$drug_hyper, useNA = c("always"))
df <- df %>% mutate(drug_hyper = ifelse(drug_hyper  == 1, 1, 0))
df <- df %>% mutate(drug_hyper = ifelse(self_hyper  == 0, 0, drug_hyper))

df <- df %>% mutate(sbp1 = ifelse(sbp1 >= 888, NA, sbp1))
df <- df %>% mutate(sbp2 = ifelse(sbp2 >= 888, NA, sbp2))
df <- df %>% mutate(sbp3 = ifelse(sbp3 >= 888, NA, sbp3))
df <- df %>% mutate(dbp1 = ifelse(dbp1 >= 888, NA, dbp1))
df <- df %>% mutate(dbp2 = ifelse(dbp2 >= 888, NA, dbp2))
df <- df %>% mutate(dbp3 = ifelse(dbp3 >= 888, NA, dbp3))


table(df$glucose_measured, df$self_diabetes, useNA = c("always"))
df <- df %>% mutate(self_diabetes = ifelse(self_diabetes  == 1, 1, 0))
df <- df %>% mutate(glucose_measured = ifelse(glucose_measured  == 1, 1, 0))

df <- df %>% mutate(self_diabetes = ifelse(glucose_measured  == 0, 0, self_diabetes))
table(df$drug_diabetes, useNA = c("always"))
df <- df %>% mutate(drug_diabetes = ifelse(drug_diabetes  == 1, 1, 0))
df <- df %>% mutate(drug_diabetes = ifelse(self_diabetes  == 0, 0, drug_diabetes))


##URINARY VARIABLES

#always verify that creatinine is in the mmol range (generally <50), convert with this if it is in mg/dl (100-400 range):
summary(df$u_creatinine)
df$u_creatinine[df$u_creatinine == 777] <- NA




### FINAL INSPECTION
View(df) #check your changes
summary(df)   #verify abnormal values (convert them to NA), "inf" value is acceptable



### SAVE FINAL DATASET
#We should have a dataframe with 30 variables as the result of the extraction, number of observations should be the same as "data"
setwd("~/Desktop/Artículos/STEPS/Extracted")   #modify according to your PC
write.csv(df, "Belarus_2017.csv", row.names = FALSE)   #create new csv file, use country full name and year, for example "Afghanistan_2018.csv"

rm(data, df)


## BHUTAN

setwd("~/Desktop/Artículos/STEPS/STEPS microdata/Bhutan") #modify according to your PC and the STEPS 





### READ DATA FILE
data <- read.csv("btn2014.csv") #modify according to the STEPS file name





### EXTRACT DATA
attach(data)
df <- data.frame(
  study_id = "BTN_2014_STEPS_v01",  #replace with STEPS id
  country = "Bhutan",   #replace with full name of the country (first letter capital, e.g., Peru)
  region  = "Southeast Asia",   #replace with full name of the WHO region (first letter capital)
  data_year = 2014,   #replace with year of data collection (if two, use the latest: 2016-2017 -> 2017)
  coverage = "National",  #replace with "National" or "Subnational"
  
  psu     = psu,
  stratum = stratum,
  wstep1  = wstep1,
  wstep2  = wstep2,
  wstep3  = wstep3,
  
  survey_min_age = 18,   #replace with the youngest age target
  survey_max_age = 69,   #replace with the oldest age target
  
  participant_id = pid,   #if not available use 1:nrow(data)
  sex            = sex,   #replace with sex variable 
  age            = age,   #replace with age variable (years)
  is_urban       = stratum,   #replace with urban/rural variable (later change to urban=1, rural=0)
  is_pregnant = m8,
  
  weight = m12,   #replace with weight variable in kg
  height = m11,   #replace with height variable in cm
  waist = m14,
  
  sbp1 = m4a,
  sbp2 = m5a,
  sbp3 = m6a,
  dbp1 = m4b,
  dbp2 = m5b,
  dbp3 = m6b,
  bp_measured = h1,
  self_hyper = h2a,
  drug_hyper  = h3,
  glucose_measured = h6, 
  self_diabetes = h7a, 
  drug_diabetes = h8,
  
  u_creatinine = b15,  #replace with spot urinary creatinine variable in mmol/l
  u_sodium = b14  #replace with spot urinary sodium variable in mmol/l
)
detach(data)

# ALWAYS "detach" AFTER "attach"



### CHECK AND RECODE 

## DEMOGRAPHIC VARIABLES
#always verify that men=1 and women=2 //
df <- df %>% mutate(sex = ifelse(sex  == "Men", 1, 2))

#always verify the units and change if needed // if height is in cm:
df$height<-df$height/100

table(df$is_pregnant)
#always verify that is_pregnant is coded like 0= no, 1= pregnant, convert with:
df[df == "Inf"] <- NA #inf means missing
df <- df %>% mutate(is_pregnant = ifelse(is_pregnant  == 1, 1, 0))
df <- df %>% mutate(is_pregnant = ifelse(sex == 1, 0, is_pregnant))
table(df$is_pregnant, df$sex)

table(df$is_urban) #recode  urban = 1, rural = 0 if necessary
df <- df %>% mutate(is_urban = ifelse(is_urban  == 1, 1, 0))

### ANTHROPOMETRIC VARIABLES
summary(df)

#recode abnormal (eg. anthropomethric measures: negative, "888" "999") values to NA
df <- df %>% mutate(waist = ifelse(waist <= 0, NA, waist))
df <- df %>% mutate(waist = ifelse(waist == 999, NA, waist))
df <- df %>% mutate(waist = ifelse(waist >= 888, NA, waist))

df <- df %>% mutate(height = ifelse(height <= 0, NA, height))
df <- df %>% mutate(height = ifelse(height == 999, NA, height))
df <- df %>% mutate(height = ifelse(height >= 8.8, NA, height))
df <- df %>% mutate(height = ifelse(height >= 5, NA, height))

df <- df %>% mutate(weight = ifelse(weight <= 0, NA, weight))
df <- df %>% mutate(weight = ifelse(weight == 999, NA, weight))
df <- df %>% mutate(weight = ifelse(weight >= 888, NA, weight))


##BP 
table(df$bp_measured)
table(df$self_hyper)
df <- df %>% mutate(self_hyper = ifelse(self_hyper  == 1, 1, 0))
df <- df %>% mutate(bp_measured = ifelse(bp_measured  == 1, 1, 0))

df <- df %>% mutate(self_hyper = ifelse(bp_measured  == 0, 0, self_hyper))
table(df$self_hyper)
summary(df$self_hyper)

table(df$drug_hyper)
df <- df %>% mutate(drug_hyper = ifelse(drug_hyper  == 1, 1, 0))
df <- df %>% mutate(drug_hyper = ifelse(self_hyper  == 0, 0, drug_hyper))


df <- df %>% mutate(sbp1 = ifelse(sbp1 >= 888, NA, sbp1))
df <- df %>% mutate(sbp2 = ifelse(sbp2 >= 888, NA, sbp2))
df <- df %>% mutate(sbp3 = ifelse(sbp3 >= 888, NA, sbp3))
df <- df %>% mutate(dbp1 = ifelse(dbp1 >= 888, NA, dbp1))
df <- df %>% mutate(dbp2 = ifelse(dbp2 >= 888, NA, dbp2))
df <- df %>% mutate(dbp3 = ifelse(dbp3 >= 888, NA, dbp3))

table(df$glucose_measured, df$self_diabetes, useNA = c("always"))
df <- df %>% mutate(self_diabetes = ifelse(self_diabetes  == 1, 1, 0))
df <- df %>% mutate(glucose_measured = ifelse(glucose_measured  == 1, 1, 0))

df <- df %>% mutate(self_diabetes = ifelse(glucose_measured  == 0, 0, self_diabetes))
table(df$drug_diabetes, useNA = c("always"))
df <- df %>% mutate(drug_diabetes = ifelse(drug_diabetes  == 1, 1, 0))
df <- df %>% mutate(drug_diabetes = ifelse(self_diabetes  == 0, 0, drug_diabetes))


##URINARY VARIABLES

#always verify that creatinine is in the mmol range (generally <50), convert with this if it is in mg/dl (100-400 range):
summary(df$u_creatinine)

df$u_creatinine<-df$u_creatinine*0.08842


### FINAL INSPECTION
View(df) #check your changes
summary(df)   #verify abnormal values (convert them to NA), "inf" value is acceptable



### SAVE FINAL DATASET
#We should have a dataframe with 30 variables as the result of the extraction, number of observations should be the same as "data"
setwd("~/Desktop/Artículos/STEPS/Extracted")   #modify according to your PC
write.csv(df, "Bhutan_2014.csv", row.names = FALSE)   #create new csv file, use country full name and year, for example "Afghanistan_2018.csv"

rm(data, df)


##Brunei Darussalam

setwd("~/Desktop/Artículos/STEPS/STEPS microdata/Brunei") #modify according to your PC and the STEPS 





### READ DATA FILE
data <- read.csv("brn2015.csv") #modify according to the STEPS file name





### EXTRACT DATA
attach(data)
df <- data.frame(
  study_id = "BRN_2015_STEPS_v01",  #replace with STEPS id
  country = "Brunei Darussalam",   #replace with full name of the country (first letter capital, e.g., Peru)
  region  = "Western Pacific",   #replace with full name of the WHO region (first letter capital)
  data_year = 2016,   #replace with year of data collection (if two, use the latest: 2016-2017 -> 2017)
  coverage = "National",  #replace with "National" or "Subnational"
  
  psu     = psu,
  stratum = stratum,
  wstep1  = wstep1,
  wstep2  = wstep2,
  wstep3  = wstep3,
  
  survey_min_age = 18,   #replace with the youngest age target
  survey_max_age = 69,   #replace with the oldest age target
  
  participant_id = pid,   #if not available use 1:nrow(data)
  sex            = sex,   #replace with sex variable 
  age            = age,   #replace with age variable (years)
  is_urban       = NA,   #replace with urban/rural variable (later change to urban=1, rural=0)
  is_pregnant = m8,
  
  weight = m12,   #replace with weight variable in kg
  height = m11,   #replace with height variable in cm
  waist = m14,
  
  sbp1 = m4a,
  sbp2 = m5a,
  sbp3 = m6a,
  dbp1 = m4b,
  dbp2 = m5b,
  dbp3 = m6b,
  bp_measured = h1,
  self_hyper = h2a,
  drug_hyper = h3,
  glucose_measured = h6, 
  self_diabetes = h7a, 
  drug_diabetes = h8,
  
  u_creatinine = b15,  #replace with spot urinary creatinine variable in mmol/l
  u_sodium = b14  #replace with spot urinary sodium variable in mmol/l
)
detach(data)

# ALWAYS "detach" AFTER "attach"



### CHECK AND RECODE 

## DEMOGRAPHIC VARIABLES
#always verify that men=1 and women=2 //
df <- df %>% mutate(sex = ifelse(sex  == "Men", 1, 2))

#always verify the units and change if needed // if height is in cm:
df$height<-df$height/100

table(df$is_pregnant,df$sex)
#always verify that is_pregnant is coded like 0= no, 1= pregnant, convert with:
df[df == "Inf"] <- NA #inf means missing
df <- df %>% mutate(is_pregnant = ifelse(is_pregnant  == 1, 1, 0))
df <- df %>% mutate(is_pregnant = ifelse(sex == 1, 0, is_pregnant))
table(df$is_pregnant, df$sex)

table(df$is_urban) #recode  urban = 1, rural = 0 if necessary

### ANTHROPOMETRIC VARIABLES
summary(df)

#recode abnormal (eg. anthropomethric measures: negative, "888" "999") values to NA
df <- df %>% mutate(waist = ifelse(waist <= 0, NA, waist))
df <- df %>% mutate(waist = ifelse(waist == 999, NA, waist))
df <- df %>% mutate(waist = ifelse(waist >= 888, NA, waist))

df <- df %>% mutate(height = ifelse(height <= 0, NA, height))
df <- df %>% mutate(height = ifelse(height == 999, NA, height))
df <- df %>% mutate(height = ifelse(height >= 8.8, NA, height))
df <- df %>% mutate(height = ifelse(height >= 5, NA, height))

df <- df %>% mutate(weight = ifelse(weight <= 0, NA, weight))
df <- df %>% mutate(weight = ifelse(weight == 999, NA, weight))
df <- df %>% mutate(weight = ifelse(weight >= 888, NA, weight))


##BP 
table(df$bp_measured)
table(df$self_hyper)
df <- df %>% mutate(self_hyper = ifelse(self_hyper  == 1, 1, 0))
df <- df %>% mutate(bp_measured = ifelse(bp_measured  == 1, 1, 0))

df <- df %>% mutate(self_hyper = ifelse(bp_measured  == 0, 0, self_hyper))
table(df$self_hyper)
summary(df$self_hyper)

table(df$drug_hyper)
df <- df %>% mutate(drug_hyper = ifelse(drug_hyper  == 1, 1, 0))
df <- df %>% mutate(drug_hyper = ifelse(self_hyper  == 0, 0, drug_hyper))

df <- df %>% mutate(sbp1 = ifelse(sbp1 >= 888, NA, sbp1))
df <- df %>% mutate(sbp2 = ifelse(sbp2 >= 888, NA, sbp2))
df <- df %>% mutate(sbp3 = ifelse(sbp3 >= 888, NA, sbp3))
df <- df %>% mutate(dbp1 = ifelse(dbp1 >= 888, NA, dbp1))
df <- df %>% mutate(dbp2 = ifelse(dbp2 >= 888, NA, dbp2))
df <- df %>% mutate(dbp3 = ifelse(dbp3 >= 888, NA, dbp3))


table(df$glucose_measured, df$self_diabetes, useNA = c("always"))
df <- df %>% mutate(self_diabetes = ifelse(self_diabetes  == 1, 1, 0))
df <- df %>% mutate(glucose_measured = ifelse(glucose_measured  == 1, 1, 0))

df <- df %>% mutate(self_diabetes = ifelse(glucose_measured  == 0, 0, self_diabetes))
table(df$drug_diabetes, useNA = c("always"))
df <- df %>% mutate(drug_diabetes = ifelse(drug_diabetes  == 1, 1, 0))
df <- df %>% mutate(drug_diabetes = ifelse(self_diabetes  == 0, 0, drug_diabetes))

##URINARY VARIABLES

#always verify that creatinine is in the mmol range (generally <50), convert with this if it is in mg/dl (100-400 range):
summary(df$u_creatinine)

#df$u_creatinine<-df$u_creatinine*0.08842


### FINAL INSPECTION
View(df) #check your changes
summary(df)   #verify abnormal values (convert them to NA), "inf" value is acceptable



### SAVE FINAL DATASET
#We should have a dataframe with 30 variables as the result of the extraction, number of observations should be the same as "data"
setwd("~/Desktop/Artículos/STEPS/Extracted")   #modify according to your PC
write.csv(df, "Brunei_2015.csv", row.names = FALSE)   #create new csv file, use country full name and year, for example "Afghanistan_2018.csv"

rm(data, df)


##GEORGIA

setwd("~/Desktop/Artículos/STEPS/STEPS microdata/Georgia") #modify according to your PC and the STEPS 





### READ DATA FILE
data <- read.csv("geo2016.csv") #modify according to the STEPS file name





### EXTRACT DATA
attach(data)
df <- data.frame(
  study_id = "GEO_2016_STEPS_v01",  #replace with STEPS id
  country = "Georgia",   #replace with full name of the country (first letter capital, e.g., Peru)
  region  = "Europe",   #replace with full name of the WHO region (first letter capital)
  data_year = 2016,   #replace with year of data collection (if two, use the latest: 2016-2017 -> 2017)
  coverage = "National",  #replace with "National" or "Subnational"
  
  psu     = psu,
  stratum = stratum,
  wstep1  = wstep1,
  wstep2  = wstep2,
  wstep3  = wstepurine,
  
  survey_min_age = 18,   #replace with the youngest age target
  survey_max_age = 69,   #replace with the oldest age target
  
  participant_id = pid,   #if not available use 1:nrow(data)
  sex            = c1,   #replace with sex variable 
  age            = age,   #replace with age variable (years)
  is_urban       = NA,   #replace with urban/rural variable (later change to urban=1, rural=0)
  is_pregnant = m8,
  
  weight = m12,   #replace with weight variable in kg
  height = m11,   #replace with height variable in cm
  waist = m14,
  
  sbp1 = m4a,
  sbp2 = m5a,
  sbp3 = m6a,
  dbp1 = m4b,
  dbp2 = m5b,
  dbp3 = m6b,
  bp_measured = h1,
  self_hyper = h2a,
  drug_hyper =h3,
  glucose_measured = h6, 
  self_diabetes = h7a, 
  drug_diabetes = h8,
  
  u_creatinine = b15,  #replace with spot urinary creatinine variable in mmol/l
  u_sodium = b14  #replace with spot urinary sodium variable in mmol/l
)
detach(data)

# ALWAYS "detach" AFTER "attach"



### CHECK AND RECODE 

## DEMOGRAPHIC VARIABLES
#always verify that men=1 and women=2 //

#always verify the units and change if needed // if height is in cm:
df$height<-df$height/100

table(df$is_pregnant, df$sex)
#always verify that is_pregnant is coded like 0= no, 1= pregnant, convert with:
df[df == "Inf"] <- NA #inf means missing
df <- df %>% mutate(is_pregnant = ifelse(is_pregnant  == 1, 1, 0))
df <- df %>% mutate(is_pregnant = ifelse(sex == 1, 0, is_pregnant))
table(df$is_pregnant, df$sex)

table(df$is_urban) #recode  urban = 1, rural = 0 if necessary

### ANTHROPOMETRIC VARIABLES
summary(df)

#recode abnormal (eg. anthropomethric measures: negative, "888" "999") values to NA
df <- df %>% mutate(waist = ifelse(waist <= 0, NA, waist))
df <- df %>% mutate(waist = ifelse(waist == 999, NA, waist))
df <- df %>% mutate(waist = ifelse(waist >= 888, NA, waist))

df <- df %>% mutate(height = ifelse(height <= 0, NA, height))
df <- df %>% mutate(height = ifelse(height == 999, NA, height))
df <- df %>% mutate(height = ifelse(height >= 8.8, NA, height))
df <- df %>% mutate(height = ifelse(height >= 5, NA, height))

df <- df %>% mutate(weight = ifelse(weight <= 0, NA, weight))
df <- df %>% mutate(weight = ifelse(weight == 999, NA, weight))
df <- df %>% mutate(weight = ifelse(weight >= 888, NA, weight))


##BP 
table(df$bp_measured)
table(df$self_hyper)
df <- df %>% mutate(self_hyper = ifelse(self_hyper  == 1, 1, 0))
df <- df %>% mutate(bp_measured = ifelse(bp_measured  == 1, 1, 0))

df <- df %>% mutate(self_hyper = ifelse(bp_measured  == 0, 0, self_hyper))
table(df$self_hyper)
summary(df$self_hyper)

table(df$drug_hyper)
df <- df %>% mutate(drug_hyper = ifelse(drug_hyper  == 1, 1, 0))
df <- df %>% mutate(drug_hyper = ifelse(self_hyper  == 0, 0, drug_hyper))

df <- df %>% mutate(sbp1 = ifelse(sbp1 >= 888, NA, sbp1))
df <- df %>% mutate(sbp2 = ifelse(sbp2 >= 888, NA, sbp2))
df <- df %>% mutate(sbp3 = ifelse(sbp3 >= 888, NA, sbp3))
df <- df %>% mutate(dbp1 = ifelse(dbp1 >= 888, NA, dbp1))
df <- df %>% mutate(dbp2 = ifelse(dbp2 >= 888, NA, dbp2))
df <- df %>% mutate(dbp3 = ifelse(dbp3 >= 888, NA, dbp3))


table(df$glucose_measured, df$self_diabetes, useNA = c("always"))
df <- df %>% mutate(self_diabetes = ifelse(self_diabetes  == 1, 1, 0))
df <- df %>% mutate(glucose_measured = ifelse(glucose_measured  == 1, 1, 0))

df <- df %>% mutate(self_diabetes = ifelse(glucose_measured  == 0, 0, self_diabetes))
table(df$drug_diabetes, useNA = c("always"))
df <- df %>% mutate(drug_diabetes = ifelse(drug_diabetes  == 1, 1, 0))
df <- df %>% mutate(drug_diabetes = ifelse(self_diabetes  == 0, 0, drug_diabetes))



##URINARY VARIABLES

#always verify that creatinine is in the mmol range (generally <50), convert with this if it is in mg/dl (100-400 range):
summary(df$u_creatinine)

df$u_creatinine<-df$u_creatinine*0.08842


### FINAL INSPECTION
View(df) #check your changes
summary(df)   #verify abnormal values (convert them to NA), "inf" value is acceptable



### SAVE FINAL DATASET
#We should have a dataframe with 30 variables as the result of the extraction, number of observations should be the same as "data"
setwd("~/Desktop/Artículos/STEPS/Extracted")   #modify according to your PC
write.csv(df, "Georgia_2016.csv", row.names = FALSE)   #create new csv file, use country full name and year, for example "Afghanistan_2018.csv"

rm(data, df)


##Jordan
setwd("~/Desktop/Artículos/STEPS/STEPS microdata/Jordania") #modify according to your PC and the STEPS 





### READ DATA FILE
data <- read.csv("jor2019.csv") #modify according to the STEPS file name





### EXTRACT DATA
attach(data)
df <- data.frame(
  study_id = "JOR_2019_STEPS_v01",  #replace with STEPS id
  country = "Jordan",   #replace with full name of the country (first letter capital, e.g., Peru)
  region  = "Eastern Mediterranean",   #replace with full name of the WHO region (first letter capital)
  data_year = 2019,   #replace with year of data collection (if two, use the latest: 2016-2017 -> 2017)
  coverage = "National",  #replace with "National" or "Subnational"
  
  psu     = psu,
  stratum = stratum,
  wstep1  = wstep1,
  wstep2  = wstep2,
  wstep3  = wstep_lab,
  
  survey_min_age = 18,   #replace with the youngest age target
  survey_max_age = 69,   #replace with the oldest age target
  
  participant_id = id,   #if not available use 1:nrow(data)
  sex            = sex,   #replace with sex variable 
  age            = age,   #replace with age variable (years)
  is_urban       = urbanrural,   #replace with urban/rural variable (later change to urban=1, rural=0)
  is_pregnant = m8,
  
  weight = m12,   #replace with weight variable in kg
  height = m11,   #replace with height variable in cm
  waist = m14,
  
  sbp1 = m4a,
  sbp2 = m5a,
  sbp3 = m6a,
  dbp1 = m4b,
  dbp2 = m5b,
  dbp3 = m6b,
  bp_measured = h1,
  self_hyper = h2a,
  drug_hyper = h3,
  glucose_measured = h6, 
  self_diabetes = h7a, 
  drug_diabetes = h8,
  
  u_creatinine = b15,  #replace with spot urinary creatinine variable in mmol/l
  u_sodium = b14  #replace with spot urinary sodium variable in mmol/l
)
detach(data)

# ALWAYS "detach" AFTER "attach"



### CHECK AND RECODE 

## DEMOGRAPHIC VARIABLES
#always verify that men=1 and women=2 //
df <- df %>% mutate(sex = ifelse(sex  == "Men", 1, 2))

#always verify the units and change if needed // if height is in cm:
df$height<-df$height/100

table(df$is_pregnant)
#always verify that is_pregnant is coded like 0= no, 1= pregnant, convert with:
df[df == "Inf"] <- NA #inf means missing
df <- df %>% mutate(is_pregnant = ifelse(is_pregnant  == 1, 1, 0))
df <- df %>% mutate(is_pregnant = ifelse(sex == 1, 0, is_pregnant))
table(df$is_pregnant, df$sex)

table(df$is_urban) #recode  urban = 1, rural = 0 if necessary
df <- df %>% mutate(is_urban = ifelse(is_urban  == "U", 1, 0))
### ANTHROPOMETRIC VARIABLES
summary(df)

#recode abnormal (eg. anthropomethric measures: negative, "888" "999") values to NA
df <- df %>% mutate(waist = ifelse(waist <= 0, NA, waist))
df <- df %>% mutate(waist = ifelse(waist == 999, NA, waist))
df <- df %>% mutate(waist = ifelse(waist >= 888, NA, waist))

df <- df %>% mutate(height = ifelse(height <= 0, NA, height))
df <- df %>% mutate(height = ifelse(height == 999, NA, height))
df <- df %>% mutate(height = ifelse(height >= 8.8, NA, height))
df <- df %>% mutate(height = ifelse(height >= 5, NA, height))

df <- df %>% mutate(weight = ifelse(weight <= 0, NA, weight))
df <- df %>% mutate(weight = ifelse(weight == 999, NA, weight))
df <- df %>% mutate(weight = ifelse(weight >= 888, NA, weight))


##BP 
table(df$bp_measured)
table(df$self_hyper)
df <- df %>% mutate(self_hyper = ifelse(self_hyper  == 1, 1, 0))
df <- df %>% mutate(bp_measured = ifelse(bp_measured  == 1, 1, 0))

df <- df %>% mutate(self_hyper = ifelse(bp_measured  == 0, 0, self_hyper))
table(df$self_hyper)
summary(df$self_hyper)

table(df$drug_hyper)
df <- df %>% mutate(drug_hyper = ifelse(drug_hyper  == 1, 1, 0))
df <- df %>% mutate(drug_hyper = ifelse(self_hyper  == 0, 0, drug_hyper))

df <- df %>% mutate(sbp1 = ifelse(sbp1 >= 888, NA, sbp1))
df <- df %>% mutate(sbp2 = ifelse(sbp2 >= 888, NA, sbp2))
df <- df %>% mutate(sbp3 = ifelse(sbp3 >= 888, NA, sbp3))
df <- df %>% mutate(dbp1 = ifelse(dbp1 >= 888, NA, dbp1))
df <- df %>% mutate(dbp2 = ifelse(dbp2 >= 888, NA, dbp2))
df <- df %>% mutate(dbp3 = ifelse(dbp3 >= 888, NA, dbp3))


table(df$glucose_measured, df$self_diabetes, useNA = c("always"))
df <- df %>% mutate(self_diabetes = ifelse(self_diabetes  == 1, 1, 0))
df <- df %>% mutate(glucose_measured = ifelse(glucose_measured  == 1, 1, 0))

df <- df %>% mutate(self_diabetes = ifelse(glucose_measured  == 0, 0, self_diabetes))
table(df$drug_diabetes, useNA = c("always"))
df <- df %>% mutate(drug_diabetes = ifelse(drug_diabetes  == 1, 1, 0))
df <- df %>% mutate(drug_diabetes = ifelse(self_diabetes  == 0, 0, drug_diabetes))

##URINARY VARIABLES

#always verify that creatinine is in the mmol range (generally <50), convert with this if it is in mg/dl (100-400 range):
summary(df$u_creatinine)

#df$u_creatinine<-df$u_creatinine*0.08842


### FINAL INSPECTION
View(df) #check your changes
summary(df)   #verify abnormal values (convert them to NA), "inf" value is acceptable



### SAVE FINAL DATASET
#We should have a dataframe with 30 variables as the result of the extraction, number of observations should be the same as "data"
setwd("~/Desktop/Artículos/STEPS/Extracted")   #modify according to your PC
write.csv(df, "Jordan_2019.csv", row.names = FALSE)   #create new csv file, use country full name and year, for example "Afghanistan_2018.csv"

rm(data, df)


##LEBANON
setwd("~/Desktop/Artículos/STEPS/STEPS microdata/Lebanon") #modify according to your PC and the STEPS 





### READ DATA FILE
data <- read.csv("lbn2017-lebanese.csv") #modify according to the STEPS file name





### EXTRACT DATA
attach(data)
df <- data.frame(
  study_id = "LBN_2017_STEPS_v01",  #replace with STEPS id
  country = "Lebanon",   #replace with full name of the country (first letter capital, e.g., Peru)
  region  = "Eastern Mediterranean",   #replace with full name of the WHO region (first letter capital)
  data_year = 2017,   #replace with year of data collection (if two, use the latest: 2016-2017 -> 2017)
  coverage = "National",  #replace with "National" or "Subnational"
  
  psu     = psu,
  stratum = stratum,
  wstep1  = wstep1,
  wstep2  = wstep2,
  wstep3  = wstep3,
  
  survey_min_age = 18,   #replace with the youngest age target
  survey_max_age = 69,   #replace with the oldest age target
  
  participant_id = pid,   #if not available use 1:nrow(data)
  sex            = sex,   #replace with sex variable 
  age            = age,   #replace with age variable (years)
  is_urban       = NA,   #replace with urban/rural variable (later change to urban=1, rural=0)
  is_pregnant = m8,
  
  weight = m12,   #replace with weight variable in kg
  height = m11,   #replace with height variable in cm
  waist = m14,
  
  sbp1 = m4a,
  sbp2 = m5a,
  sbp3 = m6a,
  dbp1 = m4b,
  dbp2 = m5b,
  dbp3 = m6b,
  bp_measured = h1,
  self_hyper = h2a,
  drug_hyper= h3,
  glucose_measured = h6, 
  self_diabetes = h7a, 
  drug_diabetes = h8,
  
  u_creatinine = b15,  #replace with spot urinary creatinine variable in mmol/l
  u_sodium = b14  #replace with spot urinary sodium variable in mmol/l
)
detach(data)

# ALWAYS "detach" AFTER "attach"



### CHECK AND RECODE 

## DEMOGRAPHIC VARIABLES
#always verify that men=1 and women=2 //
df <- df %>% mutate(sex = ifelse(sex  == "Men", 1, 2))

#always verify the units and change if needed // if height is in cm:
df$height<-df$height/100

table(df$is_pregnant)
#always verify that is_pregnant is coded like 0= no, 1= pregnant, convert with:
df[df == "Inf"] <- NA #inf means missing
df <- df %>% mutate(is_pregnant = ifelse(is_pregnant  == 1, 1, 0))
df <- df %>% mutate(is_pregnant = ifelse(sex == 1, 0, is_pregnant))
table(df$is_pregnant, df$sex)

table(df$is_urban) #recode  urban = 1, rural = 0 if necessary

### ANTHROPOMETRIC VARIABLES
summary(df)

#recode abnormal (eg. anthropomethric measures: negative, "888" "999") values to NA
df <- df %>% mutate(waist = ifelse(waist <= 0, NA, waist))
df <- df %>% mutate(waist = ifelse(waist == 999, NA, waist))
df <- df %>% mutate(waist = ifelse(waist >= 888, NA, waist))

df <- df %>% mutate(height = ifelse(height <= 0, NA, height))
df <- df %>% mutate(height = ifelse(height == 999, NA, height))
df <- df %>% mutate(height = ifelse(height >= 8.8, NA, height))
df <- df %>% mutate(height = ifelse(height >= 5, NA, height))

df <- df %>% mutate(weight = ifelse(weight <= 0, NA, weight))
df <- df %>% mutate(weight = ifelse(weight == 999, NA, weight))
df <- df %>% mutate(weight = ifelse(weight >= 888, NA, weight))


##BP 
table(df$bp_measured)
table(df$self_hyper)
df <- df %>% mutate(self_hyper = ifelse(self_hyper  == 1, 1, 0))
df <- df %>% mutate(bp_measured = ifelse(bp_measured  == 1, 1, 0))

df <- df %>% mutate(self_hyper = ifelse(bp_measured  == 0, 0, self_hyper))
table(df$self_hyper)
summary(df$self_hyper)

table(df$drug_hyper)
df <- df %>% mutate(drug_hyper = ifelse(drug_hyper  == 1, 1, 0))
df <- df %>% mutate(drug_hyper = ifelse(self_hyper  == 0, 0, drug_hyper))

df <- df %>% mutate(sbp1 = ifelse(sbp1 >= 888, NA, sbp1))
df <- df %>% mutate(sbp2 = ifelse(sbp2 >= 888, NA, sbp2))
df <- df %>% mutate(sbp3 = ifelse(sbp3 >= 888, NA, sbp3))
df <- df %>% mutate(dbp1 = ifelse(dbp1 >= 888, NA, dbp1))
df <- df %>% mutate(dbp2 = ifelse(dbp2 >= 888, NA, dbp2))
df <- df %>% mutate(dbp3 = ifelse(dbp3 >= 888, NA, dbp3))


table(df$glucose_measured, df$self_diabetes, useNA = c("always"))
df <- df %>% mutate(self_diabetes = ifelse(self_diabetes  == 1, 1, 0))
df <- df %>% mutate(glucose_measured = ifelse(glucose_measured  == 1, 1, 0))

df <- df %>% mutate(self_diabetes = ifelse(glucose_measured  == 0, 0, self_diabetes))
table(df$drug_diabetes, useNA = c("always"))
df <- df %>% mutate(drug_diabetes = ifelse(drug_diabetes  == 1, 1, 0))
df <- df %>% mutate(drug_diabetes = ifelse(self_diabetes  == 0, 0, drug_diabetes))

##URINARY VARIABLES

#always verify that creatinine is in the mmol range (generally <50), convert with this if it is in mg/dl (100-400 range):
summary(df$u_creatinine)

df$u_creatinine<-df$u_creatinine*0.08842


### FINAL INSPECTION
View(df) #check your changes
summary(df)   #verify abnormal values (convert them to NA), "inf" value is acceptable



### SAVE FINAL DATASET
#We should have a dataframe with 30 variables as the result of the extraction, number of observations should be the same as "data"
setwd("~/Desktop/Artículos/STEPS/Extracted")   #modify according to your PC
write.csv(df, "Lebanon_2017.csv", row.names = FALSE)   #create new csv file, use country full name and year, for example "Afghanistan_2018.csv"

rm(data, df)


##MALAWI

setwd("~/Desktop/Artículos/STEPS/Data/STEPS microdata/Malawi") #modify according to your PC and the STEPS 





### READ DATA FILE
data <- read.csv("mwi2017.csv") #modify according to the STEPS file name





### EXTRACT DATA
attach(data)
df <- data.frame(
  study_id = "MWI_2017_STEPS_v01",  #replace with STEPS id
  country = "Malawi",   #replace with full name of the country (first letter capital, e.g., Peru)
  region  = "Africa",   #replace with full name of the WHO region (first letter capital)
  data_year = 2017,   #replace with year of data collection (if two, use the latest: 2016-2017 -> 2017)
  coverage = "National",  #replace with "National" or "Subnational"
  
  psu     = psu,
  stratum = stratum,
  wstep1  = wstep1,
  wstep2  = wstep2,
  wstep3  = wstep3,
  
  survey_min_age = 18,   #replace with the youngest age target
  survey_max_age = 69,   #replace with the oldest age target
  
  participant_id = id,   #if not available use 1:nrow(data)
  sex            = sex,   #replace with sex variable 
  age            = age,   #replace with age variable (years)
  is_urban       = urbanrural,   #replace with urban/rural variable (later change to urban=1, rural=0)
  is_pregnant = m8,
  
  weight = m12,   #replace with weight variable in kg
  height = m11,   #replace with height variable in cm
  waist = m14,
  
  sbp1 = m4a,
  sbp2 = m5a,
  sbp3 = m6a,
  dbp1 = m4b,
  dbp2 = m5b,
  dbp3 = m6b,
  bp_measured = h1,
  self_hyper = h2a,
  drug_hyper = h3,
  glucose_measured = h6, 
  self_diabetes = h7a, 
  drug_diabetes = h8,
  
  u_creatinine = b15,  #replace with spot urinary creatinine variable in mmol/l
  u_sodium = b14  #replace with spot urinary sodium variable in mmol/l
)
detach(data)

# ALWAYS "detach" AFTER "attach"



### CHECK AND RECODE 

## DEMOGRAPHIC VARIABLES
#always verify that men=1 and women=2 //
df <- df %>% mutate(sex = ifelse(sex  == "Men", 1, 2))

#always verify the units and change if needed // if height is in cm:
df$height<-df$height/100

table(df$is_pregnant,df$sex)
#always verify that is_pregnant is coded like 0= no, 1= pregnant, convert with:
df[df == "Inf"] <- NA #inf means missing
df <- df %>% mutate(is_pregnant = ifelse(is_pregnant  == 1, 1, 0))
df <- df %>% mutate(is_pregnant = ifelse(sex == 1, 0, is_pregnant))
table(df$is_pregnant, df$sex)

table(df$is_urban) #recode  urban = 1, rural = 0 if necessary
df <- df %>% mutate(is_urban = ifelse(is_urban == "Urban", 1, 0))
### ANTHROPOMETRIC VARIABLES
summary(df)

#recode abnormal (eg. anthropomethric measures: negative, "888" "999") values to NA
df <- df %>% mutate(waist = ifelse(waist <= 0, NA, waist))
df <- df %>% mutate(waist = ifelse(waist == 999, NA, waist))
df <- df %>% mutate(waist = ifelse(waist >= 888, NA, waist))

df <- df %>% mutate(height = ifelse(height <= 0, NA, height))
df <- df %>% mutate(height = ifelse(height == 999, NA, height))
df <- df %>% mutate(height = ifelse(height >= 8.8, NA, height))
df <- df %>% mutate(height = ifelse(height >= 5, NA, height))

df <- df %>% mutate(weight = ifelse(weight <= 0, NA, weight))
df <- df %>% mutate(weight = ifelse(weight == 999, NA, weight))
df <- df %>% mutate(weight = ifelse(weight >= 888, NA, weight))


##BP 
table(df$bp_measured)
table(df$self_hyper)
df <- df %>% mutate(self_hyper = ifelse(self_hyper  == 1, 1, 0))
df <- df %>% mutate(bp_measured = ifelse(bp_measured  == 1, 1, 0))

df <- df %>% mutate(self_hyper = ifelse(bp_measured  == 0, 0, self_hyper))
table(df$self_hyper)
summary(df$self_hyper)

table(df$drug_hyper)
df <- df %>% mutate(drug_hyper = ifelse(drug_hyper  == 1, 1, 0))
df <- df %>% mutate(drug_hyper = ifelse(self_hyper  == 0, 0, drug_hyper))

df <- df %>% mutate(sbp1 = ifelse(sbp1 >= 888, NA, sbp1))
df <- df %>% mutate(sbp2 = ifelse(sbp2 >= 888, NA, sbp2))
df <- df %>% mutate(sbp3 = ifelse(sbp3 >= 888, NA, sbp3))
df <- df %>% mutate(dbp1 = ifelse(dbp1 >= 888, NA, dbp1))
df <- df %>% mutate(dbp2 = ifelse(dbp2 >= 888, NA, dbp2))
df <- df %>% mutate(dbp3 = ifelse(dbp3 >= 888, NA, dbp3))


table(df$glucose_measured, df$self_diabetes, useNA = c("always"))
df <- df %>% mutate(self_diabetes = ifelse(self_diabetes  == 1, 1, 0))
df <- df %>% mutate(glucose_measured = ifelse(glucose_measured  == 1, 1, 0))

df <- df %>% mutate(self_diabetes = ifelse(glucose_measured  == 0, 0, self_diabetes))
table(df$drug_diabetes, useNA = c("always"))
df <- df %>% mutate(drug_diabetes = ifelse(drug_diabetes  == 1, 1, 0))
df <- df %>% mutate(drug_diabetes = ifelse(self_diabetes  == 0, 0, drug_diabetes))

##URINARY VARIABLES

#always verify that creatinine is in the mmol range (generally <50), convert with this if it is in mg/dl (100-400 range):
summary(df$u_creatinine)
df$u_creatinine[df$u_creatinine == 777] <- NA
df$u_sodium[df$u_sodium == 777] <- NA
df$u_creatinine<-df$u_creatinine*0.08842


### FINAL INSPECTION
View(df) #check your changes
summary(df)   #verify abnormal values (convert them to NA), "inf" value is acceptable



### SAVE FINAL DATASET
#We should have a dataframe with 30 variables as the result of the extraction, number of observations should be the same as "data"
setwd("~/Desktop/Artículos/STEPS/Extracted")   #modify according to your PC
write.csv(df, "Malawi_2017.csv", row.names = FALSE)   #create new csv file, use country full name and year, for example "Afghanistan_2018.csv"

rm(data, df)

##MONGOLIA 2019

setwd("~/Desktop/Artículos/STEPS/STEPS microdata/Mongolia2019") #modify according to your PC and the STEPS 





### READ DATA FILE
data <- read.csv("mng2019.csv") #modify according to the STEPS file name





### EXTRACT DATA
attach(data)
df <- data.frame(
  study_id = "MNG_2019_STEPS_v01",  #replace with STEPS id
  country = "Mongolia",   #replace with full name of the country (first letter capital, e.g., Peru)
  region  = "Western Pacific",   #replace with full name of the WHO region (first letter capital)
  data_year = 2019,   #replace with year of data collection (if two, use the latest: 2016-2017 -> 2017)
  coverage = "National",  #replace with "National" or "Subnational"
  
  psu     = psu,
  stratum = stratum,
  wstep1  = wstep1,
  wstep2  = wstep2,
  wstep3  = wstep4,
  
  survey_min_age = 15,   #replace with the youngest age target
  survey_max_age = 69,   #replace with the oldest age target
  
  participant_id = pid,   #if not available use 1:nrow(data)
  sex            = sex,   #replace with sex variable 
  age            = age,   #replace with age variable (years)
  is_urban       = area,   #replace with urban/rural variable (later change to urban=1, rural=0)
  is_pregnant = m8,
  
  weight = m12,   #replace with weight variable in kg
  height = m11,   #replace with height variable in cm
  waist = m14,
  
  sbp1 = m4a,
  sbp2 = m5a,
  sbp3 = m6a,
  dbp1 = m4b,
  dbp2 = m5b,
  dbp3 = m6b,
  bp_measured = h1,
  self_hyper = h2a,
  drug_hyper =  h3,
  glucose_measured = h6, 
  self_diabetes = h7a, 
  drug_diabetes = h8,
  
  u_creatinine = b15,  #replace with spot urinary creatinine variable in mmol/l
  u_sodium = b14  #replace with spot urinary sodium variable in mmol/l
)
detach(data)

# ALWAYS "detach" AFTER "attach"



### CHECK AND RECODE 

## DEMOGRAPHIC VARIABLES
#always verify that men=1 and women=2 //
df <- df %>% mutate(sex = ifelse(sex  == "Men", 1, 2))

#always verify the units and change if needed // if height is in cm:
df$height<-df$height/100

table(df$is_pregnant)
#always verify that is_pregnant is coded like 0= no, 1= pregnant, convert with:
df[df == "Inf"] <- NA #inf means missing
df <- df %>% mutate(is_pregnant = ifelse(is_pregnant  == 1, 1, 0))
df <- df %>% mutate(is_pregnant = ifelse(sex == 1, 0, is_pregnant))
table(df$is_pregnant, df$sex)

table(df$is_urban) #recode  urban = 1, rural = 0 if necessary
df <- df %>% mutate(is_urban = ifelse(is_urban  == "Urban", 1, 0))


### ANTHROPOMETRIC VARIABLES
summary(df)

#recode abnormal (eg. anthropomethric measures: negative, "888" "999") values to NA
df <- df %>% mutate(waist = ifelse(waist <= 0, NA, waist))
df <- df %>% mutate(waist = ifelse(waist == 999, NA, waist))
df <- df %>% mutate(waist = ifelse(waist >= 888, NA, waist))

df <- df %>% mutate(height = ifelse(height <= 0, NA, height))
df <- df %>% mutate(height = ifelse(height == 999, NA, height))
df <- df %>% mutate(height = ifelse(height >= 8.8, NA, height))
df <- df %>% mutate(height = ifelse(height >= 5, NA, height))

df <- df %>% mutate(weight = ifelse(weight <= 0, NA, weight))
df <- df %>% mutate(weight = ifelse(weight == 999, NA, weight))
df <- df %>% mutate(weight = ifelse(weight >= 888, NA, weight))


##BP 
table(df$bp_measured)
table(df$self_hyper)
df <- df %>% mutate(self_hyper = ifelse(self_hyper  == 1, 1, 0))
df <- df %>% mutate(bp_measured = ifelse(bp_measured  == 1, 1, 0))

df <- df %>% mutate(self_hyper = ifelse(bp_measured  == 0, 0, self_hyper))
table(df$self_hyper)
summary(df$self_hyper)

table(df$drug_hyper)
df <- df %>% mutate(drug_hyper = ifelse(drug_hyper  == 1, 1, 0))
df <- df %>% mutate(drug_hyper = ifelse(self_hyper  == 0, 0, drug_hyper))

df <- df %>% mutate(sbp1 = ifelse(sbp1 >= 888, NA, sbp1))
df <- df %>% mutate(sbp2 = ifelse(sbp2 >= 888, NA, sbp2))
df <- df %>% mutate(sbp3 = ifelse(sbp3 >= 888, NA, sbp3))
df <- df %>% mutate(dbp1 = ifelse(dbp1 >= 888, NA, dbp1))
df <- df %>% mutate(dbp2 = ifelse(dbp2 >= 888, NA, dbp2))
df <- df %>% mutate(dbp3 = ifelse(dbp3 >= 888, NA, dbp3))


table(df$glucose_measured, df$self_diabetes, useNA = c("always"))
df <- df %>% mutate(self_diabetes = ifelse(self_diabetes  == 1, 1, 0))
df <- df %>% mutate(glucose_measured = ifelse(glucose_measured  == 1, 1, 0))

df <- df %>% mutate(self_diabetes = ifelse(glucose_measured  == 0, 0, self_diabetes))
table(df$drug_diabetes, useNA = c("always"))
df <- df %>% mutate(drug_diabetes = ifelse(drug_diabetes  == 1, 1, 0))
df <- df %>% mutate(drug_diabetes = ifelse(self_diabetes  == 0, 0, drug_diabetes))

##URINARY VARIABLES

#always verify that creatinine is in the mmol range (generally <50), convert with this if it is in mg/dl (100-400 range):
summary(df$u_creatinine)
df$u_creatinine<-df$u_creatinine*0.001
summary(df$u_creatinine)

### FINAL INSPECTION
View(df) #check your changes
summary(df)   #verify abnormal values (convert them to NA), "inf" value is acceptable



### SAVE FINAL DATASET
#We should have a dataframe with 30 variables as the result of the extraction, number of observations should be the same as "data"
setwd("~/Desktop/Artículos/STEPS/Extracted")   #modify according to your PC
write.csv(df, "Mongolia_2019.csv", row.names = FALSE)   #create new csv file, use country full name and year, for example "Afghanistan_2018.csv"

rm(data, df)



##MONGOLIA 2013

setwd("~/Desktop/Artículos/STEPS/STEPS microdata/Mongolia2013") #modify according to your PC and the STEPS 





### READ DATA FILE
data <- read.csv("MNG2013.csv") #modify according to the STEPS file name





### EXTRACT DATA
attach(data)
df <- data.frame(
  study_id = "MNG_2013_STEPS_v01",  #replace with STEPS id
  country = "Mongolia",   #replace with full name of the country (first letter capital, e.g., Peru)
  region  = "Western Pacific",   #replace with full name of the WHO region (first letter capital)
  data_year = 2013,   #replace with year of data collection (if two, use the latest: 2016-2017 -> 2017)
  coverage = "National",  #replace with "National" or "Subnational"
  
  psu     = psu,
  stratum = stratum,
  wstep1  = wstep1,
  wstep2  = wstep2,
  wstep3  = wstep3,
  
  survey_min_age = 15,   #replace with the youngest age target
  survey_max_age = 64,   #replace with the oldest age target
  
  participant_id = pid,   #if not available use 1:nrow(data)
  sex            = sex,   #replace with sex variable 
  age            = age,   #replace with age variable (years)
  is_urban       = stratum,   #replace with urban/rural variable (later change to urban=1, rural=0)
  is_pregnant = m5,
  
  weight = m4,   #replace with weight variable in kg
  height = m3,   #replace with height variable in cm
  waist = m7,
  
  sbp1 = m11a,
  sbp2 = m12a,
  sbp3 = m13a,
  dbp1 = m11b,
  dbp2 = m12b,
  dbp3 = m13b,
  bp_measured = h1,
  self_hyper = h2a,
  drug_hyper= h3,
  glucose_measured = h6, 
  self_diabetes = h7a, 
  drug_diabetes = h8,
  
  u_creatinine = x24,  #replace with spot urinary creatinine variable in mmol/l
  u_sodium = x22  #replace with spot urinary sodium variable in mmol/l
)
detach(data)

# ALWAYS "detach" AFTER "attach"



### CHECK AND RECODE 

## DEMOGRAPHIC VARIABLES
#always verify that men=1 and women=2 //
df <- df %>% mutate(sex = ifelse(sex  == "Men", 1, 2))

#always verify the units and change if needed // if height is in cm:
df$height<-df$height/100

table(df$is_pregnant)
#always verify that is_pregnant is coded like 0= no, 1= pregnant, convert with:
df[df == "Inf"] <- NA #inf means missing
df <- df %>% mutate(is_pregnant = ifelse(is_pregnant  == 1, 1, 0))
df <- df %>% mutate(is_pregnant = ifelse(sex == 1, 0, is_pregnant))
table(df$is_pregnant, df$sex)

table(df$is_urban) #recode  urban = 1, rural = 0 if necessary
df <- df %>% mutate(is_urban = ifelse(is_urban  == 1, 1, 0))

### ANTHROPOMETRIC VARIABLES
summary(df)

#recode abnormal (eg. anthropomethric measures: negative, "888" "999") values to NA
df <- df %>% mutate(waist = ifelse(waist <= 0, NA, waist))
df <- df %>% mutate(waist = ifelse(waist == 999, NA, waist))
df <- df %>% mutate(waist = ifelse(waist >= 888, NA, waist))

df <- df %>% mutate(height = ifelse(height <= 0, NA, height))
df <- df %>% mutate(height = ifelse(height == 999, NA, height))
df <- df %>% mutate(height = ifelse(height >= 8.8, NA, height))
df <- df %>% mutate(height = ifelse(height >= 5, NA, height))

df <- df %>% mutate(weight = ifelse(weight <= 0, NA, weight))
df <- df %>% mutate(weight = ifelse(weight == 999, NA, weight))
df <- df %>% mutate(weight = ifelse(weight >= 888, NA, weight))


##BP 
table(df$bp_measured)
table(df$self_hyper)
df <- df %>% mutate(self_hyper = ifelse(self_hyper  == 1, 1, 0))
df <- df %>% mutate(bp_measured = ifelse(bp_measured  == 1, 1, 0))

df <- df %>% mutate(self_hyper = ifelse(bp_measured  == 0, 0, self_hyper))
table(df$self_hyper)
summary(df$self_hyper)

table(df$drug_hyper)
df <- df %>% mutate(drug_hyper = ifelse(drug_hyper  == 1, 1, 0))
df <- df %>% mutate(drug_hyper = ifelse(self_hyper  == 0, 0, drug_hyper))

df <- df %>% mutate(sbp1 = ifelse(sbp1 >= 888, NA, sbp1))
df <- df %>% mutate(sbp2 = ifelse(sbp2 >= 888, NA, sbp2))
df <- df %>% mutate(sbp3 = ifelse(sbp3 >= 888, NA, sbp3))
df <- df %>% mutate(dbp1 = ifelse(dbp1 >= 888, NA, dbp1))
df <- df %>% mutate(dbp2 = ifelse(dbp2 >= 888, NA, dbp2))
df <- df %>% mutate(dbp3 = ifelse(dbp3 >= 888, NA, dbp3))


table(df$glucose_measured, df$self_diabetes, useNA = c("always"))
df <- df %>% mutate(self_diabetes = ifelse(self_diabetes  == 1, 1, 0))
df <- df %>% mutate(glucose_measured = ifelse(glucose_measured  == 1, 1, 0))

df <- df %>% mutate(self_diabetes = ifelse(glucose_measured  == 0, 0, self_diabetes))
table(df$drug_diabetes, useNA = c("always"))
df <- df %>% mutate(drug_diabetes = ifelse(drug_diabetes  == 1, 1, 0))
df <- df %>% mutate(drug_diabetes = ifelse(self_diabetes  == 0, 0, drug_diabetes))

##URINARY VARIABLES

#always verify that creatinine is in the mmol range (generally <50), convert with this if it is in mg/dl (100-400 range):
summary(df$u_creatinine)

#df$u_creatinine<-df$u_creatinine*0.08842
df$u_creatinine<-df$u_creatinine*0.001
summary(df$u_creatinine)



### FINAL INSPECTION
View(df) #check your changes
summary(df)   #verify abnormal values (convert them to NA), "inf" value is acceptable



### SAVE FINAL DATASET
#We should have a dataframe with 30 variables as the result of the extraction, number of observations should be the same as "data"
setwd("~/Desktop/Artículos/STEPS/Extracted")   #modify according to your PC
write.csv(df, "Mongolia_2013.csv", row.names = FALSE)   #create new csv file, use country full name and year, for example "Afghanistan_2018.csv"

rm(data, df)


##Morocco
setwd("~/Desktop/Artículos/STEPS/STEPS microdata/Morocco") #modify according to your PC and the STEPS 





### READ DATA FILE
data <- read.csv("mar2017.csv") #modify according to the STEPS file name





### EXTRACT DATA
attach(data)
df <- data.frame(
  study_id = "MAR_2017_STEPS_v01",  #replace with STEPS id
  country = "Morocco",   #replace with full name of the country (first letter capital, e.g., Peru)
  region  = "Eastern Mediterranean",   #replace with full name of the WHO region (first letter capital)
  data_year = 2017,   #replace with year of data collection (if two, use the latest: 2016-2017 -> 2017)
  coverage = "National",  #replace with "National" or "Subnational"
  
  psu     = psu,
  stratum = stratum,
  wstep1  = wstep1,
  wstep2  = wstep2,
  wstep3  = wstep4,
  
  survey_min_age = 18,   #replace with the youngest age target
  survey_max_age = NA,   #replace with the oldest age target
  
  participant_id = pid,   #if not available use 1:nrow(data)
  sex            = sex,   #replace with sex variable 
  age            = age,   #replace with age variable (years)
  is_urban       = stratum,   #replace with urban/rural variable (later change to urban=1, rural=0)
  is_pregnant = m8,
  
  weight = m12,   #replace with weight variable in kg
  height = m11,   #replace with height variable in cm
  waist = m14,
  
  sbp1 = m4a,
  sbp2 = m5a,
  sbp3 = m6a,
  dbp1 = m4b,
  dbp2 = m5b,
  dbp3 = m6b,
  bp_measured = h1,
  self_hyper = h2a,
  drug_hyper =h3,
  glucose_measured = h6, 
  self_diabetes = h7a, 
  drug_diabetes = h8,
  
  u_creatinine = b15,  #replace with spot urinary creatinine variable in mmol/l
  u_sodium = b14  #replace with spot urinary sodium variable in mmol/l
)
detach(data)

# ALWAYS "detach" AFTER "attach"



### CHECK AND RECODE 

## DEMOGRAPHIC VARIABLES
#always verify that men=1 and women=2 //
df <- df %>% mutate(sex = ifelse(sex  == "Men", 1, 2))

#always verify the units and change if needed // if height is in cm:
df$height<-df$height/100

table(df$is_pregnant, df$sex)
#always verify that is_pregnant is coded like 0= no, 1= pregnant, convert with:
df[df == "Inf"] <- NA #inf means missing
df <- df %>% mutate(is_pregnant = ifelse(is_pregnant  == 1, 1, 0))
df <- df %>% mutate(is_pregnant = ifelse(sex == 1, 0, is_pregnant))
table(df$is_pregnant, df$sex)

table(df$is_urban) #recode  urban = 1, rural = 0 if necessary
df <- df %>% mutate(is_urban = ifelse(is_urban  == 1, 0, 1))

### ANTHROPOMETRIC VARIABLES
summary(df)

#recode abnormal (eg. anthropomethric measures: negative, "888" "999") values to NA
df <- df %>% mutate(waist = ifelse(waist <= 0, NA, waist))
df <- df %>% mutate(waist = ifelse(waist == 999, NA, waist))
df <- df %>% mutate(waist = ifelse(waist >= 888, NA, waist))

df <- df %>% mutate(height = ifelse(height <= 0, NA, height))
df <- df %>% mutate(height = ifelse(height == 999, NA, height))
df <- df %>% mutate(height = ifelse(height >= 8.8, NA, height))
df <- df %>% mutate(height = ifelse(height >= 5, NA, height))

df <- df %>% mutate(weight = ifelse(weight <= 0, NA, weight))
df <- df %>% mutate(weight = ifelse(weight == 999, NA, weight))
df <- df %>% mutate(weight = ifelse(weight >= 888, NA, weight))


##BP 
table(df$bp_measured)
table(df$self_hyper)
df <- df %>% mutate(self_hyper = ifelse(self_hyper  == 1, 1, 0))
df <- df %>% mutate(bp_measured = ifelse(bp_measured  == 1, 1, 0))

df <- df %>% mutate(self_hyper = ifelse(bp_measured  == 0, 0, self_hyper))
table(df$self_hyper)
summary(df$self_hyper)

table(df$drug_hyper)
df <- df %>% mutate(drug_hyper = ifelse(drug_hyper  == 1, 1, 0))
df <- df %>% mutate(drug_hyper = ifelse(self_hyper  == 0, 0, drug_hyper))

df <- df %>% mutate(sbp1 = ifelse(sbp1 >= 888, NA, sbp1))
df <- df %>% mutate(sbp2 = ifelse(sbp2 >= 888, NA, sbp2))
df <- df %>% mutate(sbp3 = ifelse(sbp3 >= 888, NA, sbp3))
df <- df %>% mutate(dbp1 = ifelse(dbp1 >= 888, NA, dbp1))
df <- df %>% mutate(dbp2 = ifelse(dbp2 >= 888, NA, dbp2))
df <- df %>% mutate(dbp3 = ifelse(dbp3 >= 888, NA, dbp3))


table(df$glucose_measured, df$self_diabetes, useNA = c("always"))
df <- df %>% mutate(self_diabetes = ifelse(self_diabetes  == 1, 1, 0))
df <- df %>% mutate(glucose_measured = ifelse(glucose_measured  == 1, 1, 0))

df <- df %>% mutate(self_diabetes = ifelse(glucose_measured  == 0, 0, self_diabetes))
table(df$drug_diabetes, useNA = c("always"))
df <- df %>% mutate(drug_diabetes = ifelse(drug_diabetes  == 1, 1, 0))
df <- df %>% mutate(drug_diabetes = ifelse(self_diabetes  == 0, 0, drug_diabetes))

##URINARY VARIABLES

#always verify that creatinine is in the mmol range (generally <50), convert with this if it is in mg/dl (100-400 range):
summary(df$u_creatinine)





### FINAL INSPECTION
View(df) #check your changes
summary(df)   #verify abnormal values (convert them to NA), "inf" value is acceptable



### SAVE FINAL DATASET
#We should have a dataframe with 30 variables as the result of the extraction, number of observations should be the same as "data"
setwd("~/Desktop/Artículos/STEPS/Extracted")   #modify according to your PC
write.csv(df, "Morocco_2017.csv", row.names = FALSE)   #create new csv file, use country full name and year, for example "Afghanistan_2018.csv"

rm(data, df)



##NEPAL
setwd("~/Desktop/Artículos/STEPS/STEPS microdata/Nepal") #modify according to your PC and the STEPS 





### READ DATA FILE
data <- read.csv("npl2019.csv") #modify according to the STEPS file name





### EXTRACT DATA
attach(data)
df <- data.frame(
  study_id = "NPL_2019_STEPS_v01",  #replace with STEPS id
  country = "Nepal",   #replace with full name of the country (first letter capital, e.g., Peru)
  region  = "Southeast Asia",   #replace with full name of the WHO region (first letter capital)
  data_year = 2019,   #replace with year of data collection (if two, use the latest: 2016-2017 -> 2017)
  coverage = "National",  #replace with "National" or "Subnational"
  
  psu     = psu,
  stratum = stratum,
  wstep1  = wstep1,
  wstep2  = wstep2,
  wstep3  = wstep3,
  
  survey_min_age = 15,   #replace with the youngest age target
  survey_max_age = 69,   #replace with the oldest age target
  
  participant_id = pid,   #if not available use 1:nrow(data)
  sex            = sex,   #replace with sex variable 
  age            = age,   #replace with age variable (years)
  is_urban       = stratum,   #replace with urban/rural variable (later change to urban=1, rural=0)
  is_pregnant = m8,
  
  weight = m12,   #replace with weight variable in kg
  height = m11,   #replace with height variable in cm
  waist = m14,
  
  sbp1 = m4a,
  sbp2 = m5a,
  sbp3 = m6a,
  dbp1 = m4b,
  dbp2 = m5b,
  dbp3 = m6b,
  bp_measured = h1,
  self_hyper = h2a,
  drug_hyper= h3,
  glucose_measured = h6, 
  self_diabetes = h7a, 
  drug_diabetes = h8,
  
  u_creatinine = b15,  #replace with spot urinary creatinine variable in mmol/l
  u_sodium = b14  #replace with spot urinary sodium variable in mmol/l
)
detach(data)

# ALWAYS "detach" AFTER "attach"



### CHECK AND RECODE 

## DEMOGRAPHIC VARIABLES
#always verify that men=1 and women=2 //
df <- df %>% mutate(sex = ifelse(sex  == 1, 1, 2))

#always verify the units and change if needed // if height is in cm:
df$height<-df$height/100

table(df$is_pregnant)
#always verify that is_pregnant is coded like 0= no, 1= pregnant, convert with:
df[df == "Inf"] <- NA #inf means missing
df <- df %>% mutate(is_pregnant = ifelse(is_pregnant  == 1, 1, 0))
df <- df %>% mutate(is_pregnant = ifelse(sex == 1, 0, is_pregnant))
table(df$is_pregnant, df$sex)

table(df$is_urban) #recode  urban = 1, rural = 0 if necessary
df <- df %>% mutate(is_urban = ifelse(is_urban  == 1 | is_urban  == 2, 1, 0))


### ANTHROPOMETRIC VARIABLES
summary(df)

#recode abnormal (eg. anthropomethric measures: negative, "888" "999") values to NA
df <- df %>% mutate(waist = ifelse(waist <= 0, NA, waist))
df <- df %>% mutate(waist = ifelse(waist == 999, NA, waist))
df <- df %>% mutate(waist = ifelse(waist >= 888, NA, waist))

df <- df %>% mutate(height = ifelse(height <= 0, NA, height))
df <- df %>% mutate(height = ifelse(height == 999, NA, height))
df <- df %>% mutate(height = ifelse(height >= 8.8, NA, height))
df <- df %>% mutate(height = ifelse(height >= 5, NA, height))

df <- df %>% mutate(weight = ifelse(weight <= 0, NA, weight))
df <- df %>% mutate(weight = ifelse(weight == 999, NA, weight))
df <- df %>% mutate(weight = ifelse(weight >= 888, NA, weight))


##BP 
table(df$bp_measured)
table(df$self_hyper)
df <- df %>% mutate(self_hyper = ifelse(self_hyper  == 1, 1, 0))
df <- df %>% mutate(bp_measured = ifelse(bp_measured  == 1, 1, 0))

df <- df %>% mutate(self_hyper = ifelse(bp_measured  == 0, 0, self_hyper))
table(df$self_hyper)
summary(df$self_hyper)

table(df$drug_hyper)
df <- df %>% mutate(drug_hyper = ifelse(drug_hyper  == 1, 1, 0))
df <- df %>% mutate(drug_hyper = ifelse(self_hyper  == 0, 0, drug_hyper))

df <- df %>% mutate(sbp1 = ifelse(sbp1 >= 888, NA, sbp1))
df <- df %>% mutate(sbp2 = ifelse(sbp2 >= 888, NA, sbp2))
df <- df %>% mutate(sbp3 = ifelse(sbp3 >= 888, NA, sbp3))
df <- df %>% mutate(dbp1 = ifelse(dbp1 >= 888, NA, dbp1))
df <- df %>% mutate(dbp2 = ifelse(dbp2 >= 888, NA, dbp2))
df <- df %>% mutate(dbp3 = ifelse(dbp3 >= 888, NA, dbp3))

table(df$glucose_measured, df$self_diabetes, useNA = c("always"))
df <- df %>% mutate(self_diabetes = ifelse(self_diabetes  == 1, 1, 0))
df <- df %>% mutate(glucose_measured = ifelse(glucose_measured  == 1, 1, 0))

df <- df %>% mutate(self_diabetes = ifelse(glucose_measured  == 0, 0, self_diabetes))
table(df$drug_diabetes, useNA = c("always"))
df <- df %>% mutate(drug_diabetes = ifelse(drug_diabetes  == 1, 1, 0))
df <- df %>% mutate(drug_diabetes = ifelse(self_diabetes  == 0, 0, drug_diabetes))


##URINARY VARIABLES

#always verify that creatinine is in the mmol range (generally <50), convert with this if it is in mg/dl (100-400 range):
summary(df$u_creatinine)

df$u_creatinine<-df$u_creatinine*0.08842


### FINAL INSPECTION
View(df) #check your changes
summary(df)   #verify abnormal values (convert them to NA), "inf" value is acceptable



### SAVE FINAL DATASET
#We should have a dataframe with 30 variables as the result of the extraction, number of observations should be the same as "data"
setwd("~/Desktop/Artículos/STEPS/Extracted")   #modify according to your PC
write.csv(df, "Nepal_2019.csv", row.names = FALSE)   #create new csv file, use country full name and year, for example "Afghanistan_2018.csv"

rm(data, df)

#solomon islands
setwd("~/Desktop/Artículos/STEPS/STEPS microdata/Solomon Islands") #modify according to your PC and the STEPS 





### READ DATA FILE
data <- read.csv("slb2015.csv") #modify according to the STEPS file name





### EXTRACT DATA
attach(data)
df <- data.frame(
  study_id = "SLB_2015_STEPS_v01",  #replace with STEPS id
  country = "Solomon Islands",   #replace with full name of the country (first letter capital, e.g., Peru)
  region  = "Western Pacific",   #replace with full name of the WHO region (first letter capital)
  data_year = 2015,   #replace with year of data collection (if two, use the latest: 2016-2017 -> 2017)
  coverage = "National",  #replace with "National" or "Subnational"
  
  psu     = psu,
  stratum = stratum,
  wstep1  = wstep1,
  wstep2  = wstep2,
  wstep3  = wstep3,
  
  survey_min_age = 18,   #replace with the youngest age target
  survey_max_age = 69,   #replace with the oldest age target
  
  participant_id = pid,   #if not available use 1:nrow(data)
  sex            = sex,   #replace with sex variable 
  age            = age,   #replace with age variable (years)
  is_urban       = NA,   #replace with urban/rural variable (later change to urban=1, rural=0)
  is_pregnant = m8,
  
  weight = m12,   #replace with weight variable in kg
  height = m11,   #replace with height variable in cm
  waist = m14,
  
  sbp1 = m4a,
  sbp2 = m5a,
  sbp3 = m6a,
  dbp1 = m4b,
  dbp2 = m5b,
  dbp3 = m6b,
  bp_measured = h1,
  self_hyper = h2a,
  drug_hyper= h3,
  glucose_measured = h6, 
  self_diabetes = h7a, 
  drug_diabetes = h8,
  
  u_creatinine = b15,  #replace with spot urinary creatinine variable in mmol/l
  u_sodium = b14  #replace with spot urinary sodium variable in mmol/l
)
detach(data)

# ALWAYS "detach" AFTER "attach"



### CHECK AND RECODE 

## DEMOGRAPHIC VARIABLES
#always verify that men=1 and women=2 //
df <- df %>% mutate(sex = ifelse(sex  == "Men", 1, 2))

#always verify the units and change if needed // if height is in cm:
df$height<-df$height/100

table(df$is_pregnant)
#always verify that is_pregnant is coded like 0= no, 1= pregnant, convert with:
df[df == "Inf"] <- NA #inf means missing
df <- df %>% mutate(is_pregnant = ifelse(is_pregnant  == 1, 1, 0))
df <- df %>% mutate(is_pregnant = ifelse(sex == 1, 0, is_pregnant))
table(df$is_pregnant, df$sex)

table(df$is_urban) #recode  urban = 1, rural = 0 if necessary

### ANTHROPOMETRIC VARIABLES
summary(df)

#recode abnormal (eg. anthropomethric measures: negative, "888" "999") values to NA
df <- df %>% mutate(waist = ifelse(waist <= 0, NA, waist))
df <- df %>% mutate(waist = ifelse(waist == 999, NA, waist))
df <- df %>% mutate(waist = ifelse(waist >= 888, NA, waist))

df <- df %>% mutate(height = ifelse(height <= 0, NA, height))
df <- df %>% mutate(height = ifelse(height == 999, NA, height))
df <- df %>% mutate(height = ifelse(height >= 8.8, NA, height))
df <- df %>% mutate(height = ifelse(height >= 5, NA, height))

df <- df %>% mutate(weight = ifelse(weight <= 0, NA, weight))
df <- df %>% mutate(weight = ifelse(weight == 999, NA, weight))
df <- df %>% mutate(weight = ifelse(weight >= 888, NA, weight))


##BP 
table(df$bp_measured)
table(df$self_hyper)
df <- df %>% mutate(self_hyper = ifelse(self_hyper  == 1, 1, 0))
df <- df %>% mutate(bp_measured = ifelse(bp_measured  == 1, 1, 0))

df <- df %>% mutate(self_hyper = ifelse(bp_measured  == 0, 0, self_hyper))
table(df$self_hyper)
summary(df$self_hyper)

table(df$drug_hyper)
df <- df %>% mutate(drug_hyper = ifelse(drug_hyper  == 1, 1, 0))
df <- df %>% mutate(drug_hyper = ifelse(self_hyper  == 0, 0, drug_hyper))

df <- df %>% mutate(sbp1 = ifelse(sbp1 >= 888, NA, sbp1))
df <- df %>% mutate(sbp2 = ifelse(sbp2 >= 888, NA, sbp2))
df <- df %>% mutate(sbp3 = ifelse(sbp3 >= 888, NA, sbp3))
df <- df %>% mutate(dbp1 = ifelse(dbp1 >= 888, NA, dbp1))
df <- df %>% mutate(dbp2 = ifelse(dbp2 >= 888, NA, dbp2))
df <- df %>% mutate(dbp3 = ifelse(dbp3 >= 888, NA, dbp3))


table(df$glucose_measured, df$self_diabetes, useNA = c("always"))
df <- df %>% mutate(self_diabetes = ifelse(self_diabetes  == 1, 1, 0))
df <- df %>% mutate(glucose_measured = ifelse(glucose_measured  == 1, 1, 0))

df <- df %>% mutate(self_diabetes = ifelse(glucose_measured  == 0, 0, self_diabetes))
table(df$drug_diabetes, useNA = c("always"))
df <- df %>% mutate(drug_diabetes = ifelse(drug_diabetes  == 1, 1, 0))
df <- df %>% mutate(drug_diabetes = ifelse(self_diabetes  == 0, 0, drug_diabetes))

##URINARY VARIABLES

#always verify that creatinine is in the mmol range (generally <50), convert with this if it is in mg/dl (100-400 range):
summary(df$u_creatinine)

### FINAL INSPECTION
View(df) #check your changes
summary(df)   #verify abnormal values (convert them to NA), "inf" value is acceptable



### SAVE FINAL DATASET
#We should have a dataframe with 30 variables as the result of the extraction, number of observations should be the same as "data"
setwd("~/Desktop/Artículos/STEPS/Extracted")   #modify according to your PC
write.csv(df, "SolomonIslands_2015.csv", row.names = FALSE)   #create new csv file, use country full name and year, for example "Afghanistan_2018.csv"

rm(data, df)

##SUDAN


setwd("~/Desktop/Artículos/STEPS/STEPS microdata/Sudan") #modify according to your PC and the STEPS 





### READ DATA FILE
data <- read.csv("sdn2016.csv") #modify according to the STEPS file name





### EXTRACT DATA
attach(data)
df <- data.frame(
  study_id = "SDN_2016_STEPS_v01",  #replace with STEPS id
  country = "Sudan",   #replace with full name of the country (first letter capital, e.g., Peru)
  region  = "Eastern Mediterranean",   #replace with full name of the WHO region (first letter capital)
  data_year = 2016,   #replace with year of data collection (if two, use the latest: 2016-2017 -> 2017)
  coverage = "National",  #replace with "National" or "Subnational"
  
  psu     = psu,
  stratum = stratum,
  wstep1  = wstep1,
  wstep2  = wstep2,
  wstep3  = wstep3,
  
  survey_min_age = 18,   #replace with the youngest age target
  survey_max_age = 69,   #replace with the oldest age target
  
  participant_id = pid,   #if not available use 1:nrow(data)
  sex            = sex,   #replace with sex variable 
  age            = age,   #replace with age variable (years)
  is_urban       = stratum,   #replace with urban/rural variable (later change to urban=1, rural=0)
  is_pregnant = m8,
  
  weight = m12,   #replace with weight variable in kg
  height = m11,   #replace with height variable in cm
  waist = m14,
  
  sbp1 = m4a,
  sbp2 = m5a,
  sbp3 = m6a,
  dbp1 = m4b,
  dbp2 = m5b,
  dbp3 = m6b,
  bp_measured = h1,
  self_hyper = h2a,
  drug_hyper = h3,
  glucose_measured = h6, 
  self_diabetes = h7a, 
  drug_diabetes = h8,
  
  u_creatinine = b15,  #replace with spot urinary creatinine variable in mmol/l
  u_sodium = b14  #replace with spot urinary sodium variable in mmol/l
)
detach(data)

# ALWAYS "detach" AFTER "attach"



### CHECK AND RECODE 

## DEMOGRAPHIC VARIABLES
#always verify that men=1 and women=2 //
df <- df %>% mutate(sex = ifelse(sex  == "Men", 1, 2))

#always verify the units and change if needed // if height is in cm:
df$height<-df$height/100

table(df$is_pregnant)
#always verify that is_pregnant is coded like 0= no, 1= pregnant, convert with:
df[df == "Inf"] <- NA #inf means missing
df <- df %>% mutate(is_pregnant = ifelse(is_pregnant  == 1, 1, 0))
df <- df %>% mutate(is_pregnant = ifelse(sex == 1, 0, is_pregnant))
table(df$is_pregnant, df$sex)

table(df$is_urban) #recode  urban = 1, rural = 0 if necessary
df <- df %>% mutate(is_urban = ifelse(is_urban  == 1, 1, 0))


### ANTHROPOMETRIC VARIABLES
summary(df)

#recode abnormal (eg. anthropomethric measures: negative, "888" "999") values to NA
df <- df %>% mutate(waist = ifelse(waist <= 0, NA, waist))
df <- df %>% mutate(waist = ifelse(waist == 999, NA, waist))
df <- df %>% mutate(waist = ifelse(waist >= 888, NA, waist))

df <- df %>% mutate(height = ifelse(height <= 0, NA, height))
df <- df %>% mutate(height = ifelse(height == 999, NA, height))
df <- df %>% mutate(height = ifelse(height >= 8.8, NA, height))
df <- df %>% mutate(height = ifelse(height >= 5, NA, height))

df <- df %>% mutate(weight = ifelse(weight <= 0, NA, weight))
df <- df %>% mutate(weight = ifelse(weight == 999, NA, weight))
df <- df %>% mutate(weight = ifelse(weight >= 888, NA, weight))


##BP 
table(df$bp_measured)
table(df$self_hyper)
df <- df %>% mutate(self_hyper = ifelse(self_hyper  == 1, 1, 0))
df <- df %>% mutate(bp_measured = ifelse(bp_measured  == 1, 1, 0))

df <- df %>% mutate(self_hyper = ifelse(bp_measured  == 0, 0, self_hyper))
table(df$self_hyper)
summary(df$self_hyper)

table(df$drug_hyper)
df <- df %>% mutate(drug_hyper = ifelse(drug_hyper  == 1, 1, 0))
df <- df %>% mutate(drug_hyper = ifelse(self_hyper  == 0, 0, drug_hyper))

df <- df %>% mutate(sbp1 = ifelse(sbp1 >= 888, NA, sbp1))
df <- df %>% mutate(sbp2 = ifelse(sbp2 >= 888, NA, sbp2))
df <- df %>% mutate(sbp3 = ifelse(sbp3 >= 888, NA, sbp3))
df <- df %>% mutate(dbp1 = ifelse(dbp1 >= 888, NA, dbp1))
df <- df %>% mutate(dbp2 = ifelse(dbp2 >= 888, NA, dbp2))
df <- df %>% mutate(dbp3 = ifelse(dbp3 >= 888, NA, dbp3))


table(df$glucose_measured, df$self_diabetes, useNA = c("always"))
df <- df %>% mutate(self_diabetes = ifelse(self_diabetes  == 1, 1, 0))
df <- df %>% mutate(glucose_measured = ifelse(glucose_measured  == 1, 1, 0))

df <- df %>% mutate(self_diabetes = ifelse(glucose_measured  == 0, 0, self_diabetes))
table(df$drug_diabetes, useNA = c("always"))
df <- df %>% mutate(drug_diabetes = ifelse(drug_diabetes  == 1, 1, 0))
df <- df %>% mutate(drug_diabetes = ifelse(self_diabetes  == 0, 0, drug_diabetes))


##URINARY VARIABLES

#always verify that creatinine is in the mmol range (generally <50), convert with this if it is in mg/dl (100-400 range):
summary(df$u_creatinine)
df$u_creatinine<-df$u_creatinine*0.08842


### FINAL INSPECTION
View(df) #check your changes
summary(df)   #verify abnormal values (convert them to NA), "inf" value is acceptable



### SAVE FINAL DATASET
#We should have a dataframe with 30 variables as the result of the extraction, number of observations should be the same as "data"
setwd("~/Desktop/Artículos/STEPS/Extracted")   #modify according to your PC
write.csv(df, "Sudan_2016.csv", row.names = FALSE)   #create new csv file, use country full name and year, for example "Afghanistan_2018.csv"

rm(data, df)


##TOKELAU

setwd("~/Desktop/Artículos/STEPS/STEPS microdata/Tokelau") #modify according to your PC and the STEPS 





### READ DATA FILE
data <- read.csv("TKL2014.csv") #modify according to the STEPS file name





### EXTRACT DATA
attach(data)
df <- data.frame(
  study_id = "TKL_2014_STEPS_v01",  #replace with STEPS id
  country = "Tokelau",   #replace with full name of the country (first letter capital, e.g., Peru)
  region  = "Western Pacific",   #replace with full name of the WHO region (first letter capital)
  data_year = 2014,   #replace with year of data collection (if two, use the latest: 2016-2017 -> 2017)
  coverage = "National",  #replace with "National" or "Subnational"
  
  psu     = psu,
  stratum = stratum,
  wstep1  = wstep1,
  wstep2  = wstep2,
  wstep3  = wstep3,
  
  survey_min_age = 18,   #replace with the youngest age target
  survey_max_age = 64,   #replace with the oldest age target
  
  participant_id = pid,   #if not available use 1:nrow(data)
  sex            = sex,   #replace with sex variable 
  age            = age,   #replace with age variable (years)
  is_urban       = NA,   #replace with urban/rural variable (later change to urban=1, rural=0)
  is_pregnant = m8,
  
  weight = m12,   #replace with weight variable in kg
  height = m11,   #replace with height variable in cm
  waist = m14,
  
  sbp1 = m4a,
  sbp2 = m5a,
  sbp3 = m6a,
  dbp1 = m4b,
  dbp2 = m5b,
  dbp3 = m6b,
  bp_measured = h1,
  self_hyper = h2a,
  drug_hyper= h3,
  glucose_measured = h6, 
  self_diabetes = h7a, 
  drug_diabetes = h8,
  
  u_creatinine = b15,  #replace with spot urinary creatinine variable in mmol/l
  u_sodium = b14  #replace with spot urinary sodium variable in mmol/l
)
detach(data)

# ALWAYS "detach" AFTER "attach"



### CHECK AND RECODE 

## DEMOGRAPHIC VARIABLES
#always verify that men=1 and women=2 //
df <- df %>% mutate(sex = ifelse(sex  == "Men", 1, 2))

#always verify the units and change if needed // if height is in cm:
df$height<-df$height/100

table(df$is_pregnant)
#always verify that is_pregnant is coded like 0= no, 1= pregnant, convert with:
df[df == "Inf"] <- NA #inf means missing
df <- df %>% mutate(is_pregnant = ifelse(is_pregnant  == 1, 1, 0))
df <- df %>% mutate(is_pregnant = ifelse(sex == 1, 0, is_pregnant))
table(df$is_pregnant, df$sex)

table(df$is_urban) #recode  urban = 1, rural = 0 if necessary

### ANTHROPOMETRIC VARIABLES
summary(df)

#recode abnormal (eg. anthropomethric measures: negative, "888" "999") values to NA
df <- df %>% mutate(waist = ifelse(waist <= 0, NA, waist))
df <- df %>% mutate(waist = ifelse(waist == 999, NA, waist))
df <- df %>% mutate(waist = ifelse(waist >= 888, NA, waist))

df <- df %>% mutate(height = ifelse(height <= 0, NA, height))
df <- df %>% mutate(height = ifelse(height == 999, NA, height))
df <- df %>% mutate(height = ifelse(height >= 8.8, NA, height))
df <- df %>% mutate(height = ifelse(height >= 5, NA, height))

df <- df %>% mutate(weight = ifelse(weight <= 0, NA, weight))
df <- df %>% mutate(weight = ifelse(weight == 999, NA, weight))
df <- df %>% mutate(weight = ifelse(weight >= 888, NA, weight))


##BP 
table(df$bp_measured)
table(df$self_hyper)
df <- df %>% mutate(self_hyper = ifelse(self_hyper  == 1, 1, 0))
df <- df %>% mutate(bp_measured = ifelse(bp_measured  == 1, 1, 0))

df <- df %>% mutate(self_hyper = ifelse(bp_measured  == 0, 0, self_hyper))
table(df$self_hyper)
summary(df$self_hyper)

table(df$drug_hyper)
df <- df %>% mutate(drug_hyper = ifelse(drug_hyper  == 1, 1, 0))
df <- df %>% mutate(drug_hyper = ifelse(self_hyper  == 0, 0, drug_hyper))

df <- df %>% mutate(sbp1 = ifelse(sbp1 >= 888, NA, sbp1))
df <- df %>% mutate(sbp2 = ifelse(sbp2 >= 888, NA, sbp2))
df <- df %>% mutate(sbp3 = ifelse(sbp3 >= 888, NA, sbp3))
df <- df %>% mutate(dbp1 = ifelse(dbp1 >= 888, NA, dbp1))
df <- df %>% mutate(dbp2 = ifelse(dbp2 >= 888, NA, dbp2))
df <- df %>% mutate(dbp3 = ifelse(dbp3 >= 888, NA, dbp3))


table(df$glucose_measured, df$self_diabetes, useNA = c("always"))
df <- df %>% mutate(self_diabetes = ifelse(self_diabetes  == 1, 1, 0))
df <- df %>% mutate(glucose_measured = ifelse(glucose_measured  == 1, 1, 0))

df <- df %>% mutate(self_diabetes = ifelse(glucose_measured  == 0, 0, self_diabetes))
table(df$drug_diabetes, useNA = c("always"))
df <- df %>% mutate(drug_diabetes = ifelse(drug_diabetes  == 1, 1, 0))
df <- df %>% mutate(drug_diabetes = ifelse(self_diabetes  == 0, 0, drug_diabetes))


##URINARY VARIABLES

#always verify that creatinine is in the mmol range (generally <50), convert with this if it is in mg/dl (100-400 range):
summary(df$u_creatinine)
df$u_creatinine[df$u_creatinine >= 7714] <- NA

#df$u_creatinine<-df$u_creatinine*0.08842


### FINAL INSPECTION
View(df) #check your changes
summary(df)   #verify abnormal values (convert them to NA), "inf" value is acceptable



### SAVE FINAL DATASET
#We should have a dataframe with 30 variables as the result of the extraction, number of observations should be the same as "data"
setwd("~/Desktop/Artículos/STEPS/Extracted")   #modify according to your PC
write.csv(df, "Tokelau_2014.csv", row.names = FALSE)   #create new csv file, use country full name and year, for example "Afghanistan_2018.csv"

rm(data, df)


##TONGA

setwd("~/Desktop/Artículos/STEPS/STEPS microdata/Tonga") #modify according to your PC and the STEPS 





### READ DATA FILE
data <- read.csv("tonga2017.csv") #modify according to the STEPS file name





### EXTRACT DATA
attach(data)
df <- data.frame(
  study_id = "TON_2017_STEPS_v01",  #replace with STEPS id
  country = "Tonga",   #replace with full name of the country (first letter capital, e.g., Peru)
  region  = "Western Pacific",   #replace with full name of the WHO region (first letter capital)
  data_year = 2017,   #replace with year of data collection (if two, use the latest: 2016-2017 -> 2017)
  coverage = "National",  #replace with "National" or "Subnational"
  
  psu     = psu,
  stratum = stratum,
  wstep1  = wstep1,
  wstep2  = wstep2,
  wstep3  = wstep3,
  
  survey_min_age = 18,   #replace with the youngest age target
  survey_max_age = 69,   #replace with the oldest age target
  
  participant_id = pid,   #if not available use 1:nrow(data)
  sex            = sex,   #replace with sex variable 
  age            = age,   #replace with age variable (years)
  is_urban       = NA,   #replace with urban/rural variable (later change to urban=1, rural=0)
  is_pregnant = m8,
  
  weight = m12,   #replace with weight variable in kg
  height = m11,   #replace with height variable in cm
  waist = m14,
  
  sbp1 = m4a,
  sbp2 = m5a,
  sbp3 = m6a,
  dbp1 = m4b,
  dbp2 = m5b,
  dbp3 = m6b,
  bp_measured = h1,
  self_hyper = h2a,
  drug_hyper = h3,
  glucose_measured = h6, 
  self_diabetes = NA, 
  drug_diabetes = NA,
  
  u_creatinine = b15,  #replace with spot urinary creatinine variable in mmol/l
  u_sodium = b14  #replace with spot urinary sodium variable in mmol/l
)
detach(data)

# ALWAYS "detach" AFTER "attach"



### CHECK AND RECODE 

## DEMOGRAPHIC VARIABLES
#always verify that men=1 and women=2 //
df <- df %>% mutate(sex = ifelse(sex  == "Men", 1, 2))

#always verify the units and change if needed // if height is in cm:
df$height<-df$height/100

table(df$is_pregnant)
#always verify that is_pregnant is coded like 0= no, 1= pregnant, convert with:
df[df == "Inf"] <- NA #inf means missing
df <- df %>% mutate(is_pregnant = ifelse(is_pregnant  == 1, 1, 0))
df <- df %>% mutate(is_pregnant = ifelse(sex == 1, 0, is_pregnant))
table(df$is_pregnant, df$sex)

table(df$is_urban) #recode  urban = 1, rural = 0 if necessary

### ANTHROPOMETRIC VARIABLES
summary(df)

#recode abnormal (eg. anthropomethric measures: negative, "888" "999") values to NA
df <- df %>% mutate(waist = ifelse(waist <= 0, NA, waist))
df <- df %>% mutate(waist = ifelse(waist == 999, NA, waist))
df <- df %>% mutate(waist = ifelse(waist >= 888, NA, waist))

df <- df %>% mutate(height = ifelse(height <= 0, NA, height))
df <- df %>% mutate(height = ifelse(height == 999, NA, height))
df <- df %>% mutate(height = ifelse(height >= 8.8, NA, height))
df <- df %>% mutate(height = ifelse(height >= 5, NA, height))

df <- df %>% mutate(weight = ifelse(weight <= 0, NA, weight))
df <- df %>% mutate(weight = ifelse(weight == 999, NA, weight))
df <- df %>% mutate(weight = ifelse(weight >= 888, NA, weight))


##BP 
table(df$bp_measured)
table(df$self_hyper)
df <- df %>% mutate(self_hyper = ifelse(self_hyper  == 1, 1, 0))
df <- df %>% mutate(bp_measured = ifelse(bp_measured  == 1, 1, 0))

df <- df %>% mutate(self_hyper = ifelse(bp_measured  == 0, 0, self_hyper))
table(df$self_hyper)
summary(df$self_hyper)

table(df$drug_hyper)
df <- df %>% mutate(drug_hyper = ifelse(drug_hyper  == 1, 1, 0))
df <- df %>% mutate(drug_hyper = ifelse(self_hyper  == 0, 0, drug_hyper))

df <- df %>% mutate(sbp1 = ifelse(sbp1 >= 888, NA, sbp1))
df <- df %>% mutate(sbp2 = ifelse(sbp2 >= 888, NA, sbp2))
df <- df %>% mutate(sbp3 = ifelse(sbp3 >= 888, NA, sbp3))
df <- df %>% mutate(dbp1 = ifelse(dbp1 >= 888, NA, dbp1))
df <- df %>% mutate(dbp2 = ifelse(dbp2 >= 888, NA, dbp2))
df <- df %>% mutate(dbp3 = ifelse(dbp3 >= 888, NA, dbp3))

table(df$glucose_measured, df$self_diabetes, useNA = c("always"))
df <- df %>% mutate(glucose_measured = ifelse(glucose_measured  == 1, 1, 0))


##URINARY VARIABLES

#always verify that creatinine is in the mmol range (generally <50), convert with this if it is in mg/dl (100-400 range):
summary(df$u_creatinine)


### FINAL INSPECTION
View(df) #check your changes
summary(df)   #verify abnormal values (convert them to NA), "inf" value is acceptable



### SAVE FINAL DATASET
#We should have a dataframe with 30 variables as the result of the extraction, number of observations should be the same as "data"
setwd("~/Desktop/Artículos/STEPS/Extracted")   #modify according to your PC
write.csv(df, "Tonga_2017.csv", row.names = FALSE)   #create new csv file, use country full name and year, for example "Afghanistan_2018.csv"

rm(data, df)



##turkmenistan

setwd("~/Desktop/Artículos/STEPS/STEPS microdata/Turkmenistan") #modify according to your PC and the STEPS 





### READ DATA FILE
data <- read.csv("tkm2018.csv") #modify according to the STEPS file name





### EXTRACT DATA
attach(data)
df <- data.frame(
  study_id = "TKM_2018_STEPS_v01",  #replace with STEPS id
  country = "Turkmenistan",   #replace with full name of the country (first letter capital, e.g., Peru)
  region  = "Europe",   #replace with full name of the WHO region (first letter capital)
  data_year = 2018,   #replace with year of data collection (if two, use the latest: 2016-2017 -> 2017)
  coverage = "National",  #replace with "National" or "Subnational"
  
  psu     = psu,
  stratum = stratum,
  wstep1  = wstep1,
  wstep2  = wstep2,
  wstep3  = wstep3,
  
  survey_min_age = 18,   #replace with the youngest age target
  survey_max_age = 69,   #replace with the oldest age target
  
  participant_id = pid,   #if not available use 1:nrow(data)
  sex            = sex,   #replace with sex variable 
  age            = age,   #replace with age variable (years)
  is_urban       = urbanrural,   #replace with urban/rural variable (later change to urban=1, rural=0)
  is_pregnant = m8,
  
  weight = m12,   #replace with weight variable in kg
  height = m11,   #replace with height variable in cm
  waist = m14,
  
  sbp1 = m4a,
  sbp2 = m5a,
  sbp3 = m6a,
  dbp1 = m4b,
  dbp2 = m5b,
  dbp3 = m6b,
  bp_measured = h1,
  self_hyper = h2a,
  drug_hyper =h3,
  glucose_measured = h6, 
  self_diabetes = h7a, 
  drug_diabetes = h8,
  
  u_creatinine = b15,  #replace with spot urinary creatinine variable in mmol/l
  u_sodium = b14  #replace with spot urinary sodium variable in mmol/l
)
detach(data)

# ALWAYS "detach" AFTER "attach"



### CHECK AND RECODE 

## DEMOGRAPHIC VARIABLES
#always verify that men=1 and women=2 //
df <- df %>% mutate(sex = ifelse(sex  == "Men", 1, 2))

#always verify the units and change if needed // if height is in cm:
df$height<-df$height/100

table(df$is_pregnant)
#always verify that is_pregnant is coded like 0= no, 1= pregnant, convert with:
df[df == "Inf"] <- NA #inf means missing
df <- df %>% mutate(is_pregnant = ifelse(is_pregnant  == 1, 1, 0))
df <- df %>% mutate(is_pregnant = ifelse(sex == 1, 0, is_pregnant))
table(df$is_pregnant, df$sex)

table(df$is_urban) #recode  urban = 1, rural = 0 if necessary
df <- df %>% mutate(is_urban = ifelse(is_urban  == "Urban", 1, 0))


### ANTHROPOMETRIC VARIABLES
summary(df)

#recode abnormal (eg. anthropomethric measures: negative, "888" "999") values to NA
df <- df %>% mutate(waist = ifelse(waist <= 0, NA, waist))
df <- df %>% mutate(waist = ifelse(waist == 999, NA, waist))
df <- df %>% mutate(waist = ifelse(waist >= 888, NA, waist))

df <- df %>% mutate(height = ifelse(height <= 0, NA, height))
df <- df %>% mutate(height = ifelse(height == 999, NA, height))
df <- df %>% mutate(height = ifelse(height >= 8.8, NA, height))
df <- df %>% mutate(height = ifelse(height >= 5, NA, height))

df <- df %>% mutate(weight = ifelse(weight <= 0, NA, weight))
df <- df %>% mutate(weight = ifelse(weight == 999, NA, weight))
df <- df %>% mutate(weight = ifelse(weight >= 888, NA, weight))


##BP 
table(df$bp_measured)
table(df$self_hyper)
df <- df %>% mutate(self_hyper = ifelse(self_hyper  == 1, 1, 0))
df <- df %>% mutate(bp_measured = ifelse(bp_measured  == 1, 1, 0))

df <- df %>% mutate(self_hyper = ifelse(bp_measured  == 0, 0, self_hyper))
table(df$self_hyper)
summary(df$self_hyper)

table(df$drug_hyper)
df <- df %>% mutate(drug_hyper = ifelse(drug_hyper  == 1, 1, 0))
df <- df %>% mutate(drug_hyper = ifelse(self_hyper  == 0, 0, drug_hyper))

df <- df %>% mutate(sbp1 = ifelse(sbp1 >= 888, NA, sbp1))
df <- df %>% mutate(sbp2 = ifelse(sbp2 >= 888, NA, sbp2))
df <- df %>% mutate(sbp3 = ifelse(sbp3 >= 888, NA, sbp3))
df <- df %>% mutate(dbp1 = ifelse(dbp1 >= 888, NA, dbp1))
df <- df %>% mutate(dbp2 = ifelse(dbp2 >= 888, NA, dbp2))
df <- df %>% mutate(dbp3 = ifelse(dbp3 >= 888, NA, dbp3))


table(df$glucose_measured, df$self_diabetes, useNA = c("always"))
df <- df %>% mutate(self_diabetes = ifelse(self_diabetes  == 1, 1, 0))
df <- df %>% mutate(glucose_measured = ifelse(glucose_measured  == 1, 1, 0))

df <- df %>% mutate(self_diabetes = ifelse(glucose_measured  == 0, 0, self_diabetes))
table(df$drug_diabetes, useNA = c("always"))
df <- df %>% mutate(drug_diabetes = ifelse(drug_diabetes  == 1, 1, 0))
df <- df %>% mutate(drug_diabetes = ifelse(self_diabetes  == 0, 0, drug_diabetes))


##URINARY VARIABLES

#always verify that creatinine is in the mmol range (generally <50), convert with this if it is in mg/dl (100-400 range):
summary(df$u_creatinine)


### FINAL INSPECTION
View(df) #check your changes
summary(df)   #verify abnormal values (convert them to NA), "inf" value is acceptable



### SAVE FINAL DATASET
#We should have a dataframe with 30 variables as the result of the extraction, number of observations should be the same as "data"
setwd("~/Desktop/Artículos/STEPS/Extracted")   #modify according to your PC
write.csv(df, "Turkmenistan_2018.csv", row.names = FALSE)   #create new csv file, use country full name and year, for example "Afghanistan_2018.csv"

rm(data, df)


##zambia

setwd("~/Desktop/Artículos/STEPS/STEPS microdata/Zambia") #modify according to your PC and the STEPS 





### READ DATA FILE
data <- read.csv("zmb2017.csv") #modify according to the STEPS file name





### EXTRACT DATA
attach(data)
df <- data.frame(
  study_id = "ZMB_2017_STEPS_v01",  #replace with STEPS id
  country = "Zambia",   #replace with full name of the country (first letter capital, e.g., Peru)
  region  = "Africa",   #replace with full name of the WHO region (first letter capital)
  data_year = 2017,   #replace with year of data collection (if two, use the latest: 2016-2017 -> 2017)
  coverage = "National",  #replace with "National" or "Subnational"
  
  psu     = psu,
  stratum = stratum,
  wstep1  = wstep1,
  wstep2  = wstep2,
  wstep3  = wstep3,
  
  survey_min_age = 18,   #replace with the youngest age target
  survey_max_age = 69,   #replace with the oldest age target
  
  participant_id = pid,   #if not available use 1:nrow(data)
  sex            = sex,   #replace with sex variable 
  age            = age,   #replace with age variable (years)
  is_urban       = stratum,   #replace with urban/rural variable (later change to urban=1, rural=0)
  is_pregnant = m8,
  
  weight = m12,   #replace with weight variable in kg
  height = m11,   #replace with height variable in cm
  waist = m14,
  
  sbp1 = m4a,
  sbp2 = m5a,
  sbp3 = m6a,
  dbp1 = m4b,
  dbp2 = m5b,
  dbp3 = m6b,
  bp_measured = h1,
  self_hyper = h2a,
  drug_hyper=h3,
  glucose_measured = h6, 
  self_diabetes = h7a, 
  drug_diabetes = h8,
  
  u_creatinine = b15,  #replace with spot urinary creatinine variable in mmol/l
  u_sodium = b14  #replace with spot urinary sodium variable in mmol/l
)
detach(data)

# ALWAYS "detach" AFTER "attach"



### CHECK AND RECODE 

## DEMOGRAPHIC VARIABLES
#always verify that men=1 and women=2 //
df <- df %>% mutate(sex = ifelse(sex  == "Men", 1, 2))

#always verify the units and change if needed // if height is in cm:
df$height<-df$height/100

table(df$is_pregnant)
#always verify that is_pregnant is coded like 0= no, 1= pregnant, convert with:
df[df == "Inf"] <- NA #inf means missing
df <- df %>% mutate(is_pregnant = ifelse(is_pregnant  == 1, 1, 0))
df <- df %>% mutate(is_pregnant = ifelse(sex == 1, 0, is_pregnant))
table(df$is_pregnant, df$sex)

table(df$is_urban) #recode  urban = 1, rural = 0 if necessary
df <- df %>% mutate(is_urban = ifelse(is_urban  == 2, 1, 0))
### ANTHROPOMETRIC VARIABLES
summary(df)

#recode abnormal (eg. anthropomethric measures: negative, "888" "999") values to NA
df <- df %>% mutate(waist = ifelse(waist <= 0, NA, waist))
df <- df %>% mutate(waist = ifelse(waist == 999, NA, waist))
df <- df %>% mutate(waist = ifelse(waist >= 888, NA, waist))

df <- df %>% mutate(height = ifelse(height <= 0, NA, height))
df <- df %>% mutate(height = ifelse(height == 999, NA, height))
df <- df %>% mutate(height = ifelse(height >= 8.8, NA, height))
df <- df %>% mutate(height = ifelse(height >= 5, NA, height))

df <- df %>% mutate(weight = ifelse(weight <= 0, NA, weight))
df <- df %>% mutate(weight = ifelse(weight == 999, NA, weight))
df <- df %>% mutate(weight = ifelse(weight >= 888, NA, weight))


##BP 
table(df$bp_measured)
table(df$self_hyper)
df <- df %>% mutate(self_hyper = ifelse(self_hyper  == 1, 1, 0))
df <- df %>% mutate(bp_measured = ifelse(bp_measured  == 1, 1, 0))

df <- df %>% mutate(self_hyper = ifelse(bp_measured  == 0, 0, self_hyper))
table(df$self_hyper)
summary(df$self_hyper)

table(df$drug_hyper)
df <- df %>% mutate(drug_hyper = ifelse(drug_hyper  == 1, 1, 0))
df <- df %>% mutate(drug_hyper = ifelse(self_hyper  == 0, 0, drug_hyper))

df <- df %>% mutate(sbp1 = ifelse(sbp1 >= 888, NA, sbp1))
df <- df %>% mutate(sbp2 = ifelse(sbp2 >= 888, NA, sbp2))
df <- df %>% mutate(sbp3 = ifelse(sbp3 >= 888, NA, sbp3))
df <- df %>% mutate(dbp1 = ifelse(dbp1 >= 888, NA, dbp1))
df <- df %>% mutate(dbp2 = ifelse(dbp2 >= 888, NA, dbp2))
df <- df %>% mutate(dbp3 = ifelse(dbp3 >= 888, NA, dbp3))

table(df$glucose_measured, df$self_diabetes, useNA = c("always"))
df <- df %>% mutate(self_diabetes = ifelse(self_diabetes  == 1, 1, 0))
df <- df %>% mutate(glucose_measured = ifelse(glucose_measured  == 1, 1, 0))

df <- df %>% mutate(self_diabetes = ifelse(glucose_measured  == 0, 0, self_diabetes))
table(df$drug_diabetes, useNA = c("always"))
df <- df %>% mutate(drug_diabetes = ifelse(drug_diabetes  == 1, 1, 0))
df <- df %>% mutate(drug_diabetes = ifelse(self_diabetes  == 0, 0, drug_diabetes))

##URINARY VARIABLES

#always verify that creatinine is in the mmol range (generally <50), convert with this if it is in mg/dl (100-400 range):
summary(df$u_creatinine)


#df$u_creatinine<-df$u_creatinine*0.08842



### FINAL INSPECTION
View(df) #check your changes
summary(df)   #verify abnormal values (convert them to NA), "inf" value is acceptable
df$u_creatinine[df$u_creatinine == 777 ] <- NA #inf means missing
df$u_sodium[df$u_sodium == 777 ] <- NA


### SAVE FINAL DATASET
#We should have a dataframe with 30 variables as the result of the extraction, number of observations should be the same as "data"
setwd("~/Desktop/Artículos/STEPS/Extracted")   #modify according to your PC
write.csv(df, "Zambia_2017.csv", row.names = FALSE)   #create new csv file, use country full name and year, for example "Afghanistan_2018.csv"

rm(data, df)



## BHUTAN 2019
setwd("~/Downloads") #modify according to your PC and the STEPS 





### READ DATA FILE
data <- read.csv("btn2019.csv") #modify according to the STEPS file name





### EXTRACT DATA
attach(data)
df <- data.frame(
  study_id = "BTN_2019_STEPS_v01",  #replace with STEPS id
  country = "Bhutan",   #replace with full name of the country (first letter capital, e.g., Peru)
  region  = "Southeast Asia",   #replace with full name of the WHO region (first letter capital)
  data_year = 2019,   #replace with year of data collection (if two, use the latest: 2016-2017 -> 2017)
  coverage = "National",  #replace with "National" or "Subnational"
  
  psu     = psu,
  stratum = stratum,
  wstep1  = wstep1,
  wstep2  = wstep1,
  wstep3  = wstep1,
  
  survey_min_age = 15,   #replace with the youngest age target
  survey_max_age = 69,   #replace with the oldest age target
  
  participant_id = pid,   #if not available use 1:nrow(data)
  sex            = sex,   #replace with sex variable 
  age            = age,   #replace with age variable (years)
  is_urban       = location,   #replace with urban/rural variable (later change to urban=1, rural=0)
  is_pregnant = m8,
  
  weight = m12,   #replace with weight variable in kg
  height = m11,   #replace with height variable in cm
  waist = m14,
  
  sbp1 = m4a,
  sbp2 = m5a,
  sbp3 = m6a,
  dbp1 = m4b,
  dbp2 = m5b,
  dbp3 = m6b,
  bp_measured = h1,
  self_hyper = h2a,
  drug_hyper=h3,
  glucose_measured = h6, 
  self_diabetes = h7a, 
  drug_diabetes = h8,
  
  u_creatinine = b15,  #replace with spot urinary creatinine variable in mmol/l
  u_sodium = b14  #replace with spot urinary sodium variable in mmol/l
)
detach(data)

# ALWAYS "detach" AFTER "attach"



### CHECK AND RECODE 

## DEMOGRAPHIC VARIABLES
#always verify that men=1 and women=2 //
df <- df %>% mutate(sex = ifelse(sex  == 1, 1, 2))

#always verify the units and change if needed // if height is in cm:
df$height<-df$height/100

table(df$is_pregnant)
#always verify that is_pregnant is coded like 0= no, 1= pregnant, convert with:
df[df == "Inf"] <- NA #inf means missing
df <- df %>% mutate(is_pregnant = ifelse(is_pregnant  == 1, 1, 0))
df <- df %>% mutate(is_pregnant = ifelse(sex == 1, 0, is_pregnant))
table(df$is_pregnant, df$sex)

table(df$is_urban) #recode  urban = 1, rural = 0 if necessary
df <- df %>% mutate(is_urban = ifelse(is_urban  == 2, 1, 0))


### ANTHROPOMETRIC VARIABLES
summary(df)

#recode abnormal (eg. anthropomethric measures: negative, "888" "999") values to NA
df <- df %>% mutate(waist = ifelse(waist <= 0, NA, waist))
df <- df %>% mutate(waist = ifelse(waist == 999, NA, waist))
df <- df %>% mutate(waist = ifelse(waist >= 888, NA, waist))

df <- df %>% mutate(height = ifelse(height <= 0, NA, height))
df <- df %>% mutate(height = ifelse(height == 999, NA, height))
df <- df %>% mutate(height = ifelse(height >= 8.8, NA, height))
df <- df %>% mutate(height = ifelse(height >= 5, NA, height))

df <- df %>% mutate(weight = ifelse(weight <= 0, NA, weight))
df <- df %>% mutate(weight = ifelse(weight == 999, NA, weight))
df <- df %>% mutate(weight = ifelse(weight >= 888, NA, weight))


##BP 
table(df$bp_measured)
table(df$self_hyper)
df <- df %>% mutate(self_hyper = ifelse(self_hyper  == 1, 1, 0))
df <- df %>% mutate(bp_measured = ifelse(bp_measured  == 1, 1, 0))

df <- df %>% mutate(self_hyper = ifelse(bp_measured  == 0, 0, self_hyper))
table(df$self_hyper)
summary(df$self_hyper)

table(df$drug_hyper)
df <- df %>% mutate(drug_hyper = ifelse(drug_hyper  == 1, 1, 0))
df <- df %>% mutate(drug_hyper = ifelse(self_hyper  == 0, 0, drug_hyper))

df <- df %>% mutate(sbp1 = ifelse(sbp1 >= 888, NA, sbp1))
df <- df %>% mutate(sbp2 = ifelse(sbp2 >= 888, NA, sbp2))
df <- df %>% mutate(sbp3 = ifelse(sbp3 >= 888, NA, sbp3))
df <- df %>% mutate(dbp1 = ifelse(dbp1 >= 888, NA, dbp1))
df <- df %>% mutate(dbp2 = ifelse(dbp2 >= 888, NA, dbp2))
df <- df %>% mutate(dbp3 = ifelse(dbp3 >= 888, NA, dbp3))


table(df$glucose_measured, df$self_diabetes, useNA = c("always"))
df <- df %>% mutate(self_diabetes = ifelse(self_diabetes  == 1, 1, 0))
df <- df %>% mutate(glucose_measured = ifelse(glucose_measured  == 1, 1, 0))

df <- df %>% mutate(self_diabetes = ifelse(glucose_measured  == 0, 0, self_diabetes))
table(df$drug_diabetes, useNA = c("always"))
df <- df %>% mutate(drug_diabetes = ifelse(drug_diabetes  == 1, 1, 0))
df <- df %>% mutate(drug_diabetes = ifelse(self_diabetes  == 0, 0, drug_diabetes))

##URINARY VARIABLES

#always verify that creatinine is in the mmol range (generally <50), convert with this if it is in mg/dl (100-400 range):
summary(df$u_creatinine)

df$u_creatinine<-df$u_creatinine*0.08842


### FINAL INSPECTION
View(df) #check your changes
summary(df)   #verify abnormal values (convert them to NA), "inf" value is acceptable



### SAVE FINAL DATASET
#We should have a dataframe with 30 variables as the result of the extraction, number of observations should be the same as "data"
setwd("~/Desktop/Artículos/STEPS/Extracted")   #modify according to your PC
write.csv(df, "Bhutan_2019.csv", row.names = FALSE)   #create new csv file, use country full name and year, for example "Afghanistan_2018.csv"

rm(data, df)






## CHILE 2017
setwd("~/Desktop/DHS/Data/Chile/ENS 2016-2017/Downloaded")



### READ DATA FILE
data <- read.spss("Base de datos Encuesta Nacional de Salud 2016-2017(ENS).Formulario 1_2_EX.MINSAL_EPI. (2).sav", use.value.labels = F, to.data.frame = T)



data$h2 <- ifelse(data$h2 %in% c(1, 2), 1,
                  ifelse(data$h2 == 3, 0, NA))
data$h6[which(data$h5 == 2)] <- 0
data$h6[which(data$h6 == 2)] <- 0
data$h6[which(data$h6 == 3)] <- 1
data$drug_hyper <- ifelse(data$h6 == 1, 1, 
                          ifelse(data$h6 == 0, 0, NA))


data$di3[which(data$di3 == 3)] <- NA
data$di3[which(data$di3 == 2)] <- 0
data$di7_1[which(data$di6 == 2)] <- 0
data$di7_2[which(data$di6 == 2)] <- 0
data$di7_2[which(data$di7_2 == 2)] <- 1
data$di7_1   <- ifelse(is.na(data$di7_1) & data$di3 == 0, 0, data$di7_1)
data$di7_2   <- ifelse(is.na(data$di7_2) & data$di3 == 0, 0, data$di7_2)
data$prev_db <- ifelse(data$di3 == 1 | data$di7_1 == 1 | data$di7_2 == 1, 1, 
                       ifelse(data$di3 == 0 & data$di7_1 == 0 & data$di7_2 == 0, 0, NA))


data$diab_fam <- ifelse(data$af1b == 1, 1, 
                        ifelse(data$af1b == 2, 0, NA))


data$prev_db_date <- data$Edad - data$di3a
data$prev_db_date <- ifelse(data$prev_db_date %in% c(0, 1), 1, 
                            ifelse(data$prev_db_date == 2, 2, 
                                   ifelse(data$prev_db_date == 3, 3,
                                          ifelse(data$prev_db_date == 4, 4, 
                                                 ifelse(data$prev_db_date >= 5, 5, NA)))))
data$prev_db_date[which(data$di3 == 0)] <- 0


data$drug_chol <- ifelse(data$dis6 %in% c(1,3), 1, 
                         ifelse(data$dis6 == 2, 0, NA))
data$drug_chol[which(data$dis2 %in% c(3, 4))] <- 0
data$drug_chol[which(data$dis4 %in% c(-9999, -8888, 2, 3))] <- 0
data$drug_chol[which(data$dis5 %in% c(2))] <- 0


data$smoker <- ifelse(data$ta3 %in% c(1, 2), 1,
                      ifelse(data$ta3 %in% c(3, 4), 0, NA))


data$all_db <- NA
data$all_db[which(is.na(data$all_db) & data$Glucosa >= 126)] <- 1
data$all_db[which(is.na(data$all_db) & data$prev_db == 1)]  <- 1
data$all_db[which(is.na(data$all_db) & is.na(data$Glucosa) & data$prev_db == 1)]   <- 1
data$all_db[which(is.na(data$all_db) & data$Glucosa >= 126 & is.na(data$prev_db))] <- 1
data$all_db[which(is.na(data$all_db) & data$prev_db == 0 & data$Glucosa < 126)]   <- 0
data$all_db[which(is.na(data$all_db) & data$Glucosa < 126 & is.na(data$prev_db))] <- 0
data$all_db[which(is.na(data$all_db) & is.na(data$Glucosa) & data$prev_db == 0)]  <- 0


table(data$di1, useNA = c("always"))
data$gluc_test_ever <- ifelse(data$di1 %in% c(1), 1, 
                              ifelse(data$di1 %in% c(2,3), 0, NA))
table(data$di1, data$gluc_test_ever, useNA = c("always"))


table(data$di10, useNA = c("always"))
table(data$di10, data$di3, useNA = c("always"))
data$feet <- ifelse(data$di10 %in% c(1,2), 1,
                    ifelse(data$di10 %in% c(3,4,5), 0, NA))
#data[which(is.na(data$feet) & data$di3 %in% (0)), ]$feet <- 0
table(data$feet, data$di3, useNA = c("always"))
table(data$feet, data$prev_db, useNA = c("always"))
table(data$feet, useNA = c("always"))


table(data$di11, useNA = c("always"))
table(data$di11, data$di3, useNA = c("always"))
data$eyes <- ifelse(data$di11 %in% c(1,2), 1,
                    ifelse(data$di11 %in% c(3,4,5), 0, NA))
#data[which(is.na(data$eyes) & data$di3 %in% ("No")), ]$eyes <- 0
table(data$eyes, data$di3, useNA = c("always"))
table(data$eyes, data$prev_db, useNA = c("always"))
table(data$eyes, useNA = c("always"))



### EXTRACT DATA
attach(data)
df <- data.frame(
  study_id = "CHL_2017_ENS",  #replace with STEPS id
  country = "Chile",   #replace with full name of the country (first letter capital, e.g., Peru)
  region  = "Americas",   #replace with full name of the WHO region (first letter capital)
  data_year = 2017,   #replace with year of data collection (if two, use the latest: 2016-2017 -> 2017)
  coverage = "National",  #replace with "National" or "Subnational"
  
  psu     = 1,
  stratum = Estrato,
  wstep1  = Fexp_F1p_Corr,
  wstep2  = Fexp_F1F2p_Corr, #Este es para examen físico
  wstep3  = Fexp_EX2p_Corr,  #Este es para Cr y Na en orina
  
  survey_min_age = 15,   #replace with the youngest age target
  survey_max_age = NA,   #replace with the oldest age target
  
  participant_id = IdPersona_1,   #if not available use 1:nrow(data)
  sex            = Sexo,   #replace with sex variable 
  age            = Edad,   #replace with age variable (years)
  is_urban       = NA,   #replace with urban/rural variable (later change to urban=1, rural=0)
  is_pregnant = NA,
  
  weight = m4p1,
  height = m4p2,
  waist  = m4p3,
  sbp1 = m2p8_1, sbp2 = m2p9_1, sbp3 = m2p10_1,
  dbp1 = m2p8_2, dbp2 = m2p9_2, dbp3 = m2p10_2,
  bp_measured = NA,
  self_hyper = h2,
  drug_hyper=drug_hyper,
  glucose_measured = NA, 
  self_diabetes = prev_db, 
  drug_diabetes = NA,
  
  u_creatinine = Creatinina_en_Orina,  #replace with spot urinary creatinine variable in mmol/l
  u_sodium = Sodio_Na_en_Orina  #replace with spot urinary sodium variable in mmol/l
)
detach(data)

# ALWAYS "detach" AFTER "attach"

### CHECK AND RECODE 

## DEMOGRAPHIC VARIABLES
#always verify that men=1 and women=2 //
df <- df %>% mutate(sex = ifelse(sex  == 1, 1, 2))

#always verify the units and change if needed // if height is in cm:
df$height<-df$height/100

table(df$is_pregnant)
#always verify that is_pregnant is coded like 0= no, 1= pregnant, convert with:
df[df == "Inf"] <- NA #inf means missing

### ANTHROPOMETRIC VARIABLES
summary(df)

#recode abnormal (eg. anthropomethric measures: negative, "888" "999") values to NA
df <- df %>% mutate(waist = ifelse(waist <= 0, NA, waist))
df <- df %>% mutate(waist = ifelse(waist == 999, NA, waist))
df <- df %>% mutate(waist = ifelse(waist >= 888, NA, waist))

df <- df %>% mutate(height = ifelse(height <= 0, NA, height))
df <- df %>% mutate(height = ifelse(height == 999, NA, height))
df <- df %>% mutate(height = ifelse(height >= 8.8, NA, height))
df <- df %>% mutate(height = ifelse(height >= 5, NA, height))

df <- df %>% mutate(weight = ifelse(weight <= 0, NA, weight))
df <- df %>% mutate(weight = ifelse(weight == 999, NA, weight))
df <- df %>% mutate(weight = ifelse(weight >= 888, NA, weight))


##BP 


df <- df %>% mutate(sbp1 = ifelse(sbp1 >= 888, NA, sbp1))
df <- df %>% mutate(sbp2 = ifelse(sbp2 >= 888, NA, sbp2))
df <- df %>% mutate(sbp3 = ifelse(sbp3 >= 888, NA, sbp3))
df <- df %>% mutate(dbp1 = ifelse(dbp1 >= 888, NA, dbp1))
df <- df %>% mutate(dbp2 = ifelse(dbp2 >= 888, NA, dbp2))
df <- df %>% mutate(dbp3 = ifelse(dbp3 >= 888, NA, dbp3))

##URINARY VARIABLES

#always verify that creatinine is in the mmol range (generally <50), convert with this if it is in mg/dl (100-400 range):
summary(df$u_creatinine)

df$u_creatinine<-df$u_creatinine*0.08842


### FINAL INSPECTION
View(df) #check your changes
summary(df)   #verify abnormal values (convert them to NA), "inf" value is acceptable



### SAVE FINAL DATASET
#We should have a dataframe with 30 variables as the result of the extraction, number of observations should be the same as "data"
setwd("~/Desktop/Artículos/STEPS/Data/Extracted")   #modify according to your PC
write.csv(df, "Chile_2017.csv", row.names = FALSE)   #create new csv file, use country full name and year, for example "Afghanistan_2018.csv"

rm(data, df)




# BRAZIL 2013
setwd("~/Desktop/DHS/Data/Brazil/PNS 2013/Downloaded")
data  <- read.csv("PESPNS2013.csv")
data2 <- read.csv("Dados_dos_Exames_Laboratoriais.csv")
data  <- merge(data, data2 , by = names(data)[which(names(data)%in% names(data2))], all.x=T)
rm(data2)


data$Q006[which(data$Q006 == 2)] <- 0
data$Q006[which(is.na(data$Q006) & data$Q002 == 3)] <- 0
data$Q006[which(is.na(data$Q006) & data$Q001 == 6)] <- 0
data$drug_hyper <- data$Q006
data$Q002[which(is.na(data$Q002) & data$Q001 == 6)] <- 3
data$Q002 <- ifelse(data$Q002 == 1, 1, 
                    ifelse(data$Q002 == 3, 0, NA))


data$Q030[which(data$Q030 == 2)] <- NA
data$Q030[which(data$Q030 == 3)] <- 0
data$Q030[which(is.na(data$Q030) & data$Q029 == 6)] <- 0
data$Q03401[which(data$Q03401 == 2)] <- 0
data$Q03402[which(data$Q03402 == 2)] <- 0
data$Q03401[which(is.na(data$Q03401) & data$Q030 == 0)] <- 0
data$Q03402[which(is.na(data$Q03402) & data$Q030 == 0)] <- 0
data$prev_db <- ifelse(data$Q030 == 1 | data$Q03401 == 1 | data$Q03402 == 1, 1, 
                       ifelse(data$Q030 == 0 & data$Q03401 == 0 & data$Q03402 == 0, 0, NA))


data$prev_db_date <- data$C008 - data$Q031
data$prev_db_date <- ifelse(data$prev_db_date %in% c(0, 1), 1, 
                            ifelse(data$prev_db_date == 2, 2, 
                                   ifelse(data$prev_db_date == 3, 3,
                                          ifelse(data$prev_db_date == 4, 4, 
                                                 ifelse(data$prev_db_date >= 5, 5, NA)))))
data$prev_db_date[which(data$Q030 == 0)] <- 0


data$Q06204[which(data$Q06204 == 2)] <- 0
data$Q06204[which(is.na(data$Q06204) & data$Q060 == 2)] <- 0
data$Q06204[which(is.na(data$Q06204) & data$Q059 == 6)] <- 0
data$drug_chol <- data$Q06204


data$smoker <- data$P050
data$smoker[which(data$smoker %in% c(1,2))] <- 1
data$smoker[which(data$smoker == 3)] <- 0


data$all_db <- NA
data$all_db[which(is.na(data$all_db) & data$Z034 >= 6.5)] <- 1
data$all_db[which(is.na(data$all_db) & data$prev_db == 1)]  <- 1
data$all_db[which(is.na(data$all_db) & is.na(data$Z034) & data$prev_db == 1)]   <- 1
data$all_db[which(is.na(data$all_db) & data$Z034 >= 6.5 & is.na(data$prev_db))] <- 1
data$all_db[which(is.na(data$all_db) & data$prev_db == 0 & data$Z034 < 6.5)]   <- 0
data$all_db[which(is.na(data$all_db) & data$Z034 < 6.5 & is.na(data$prev_db))] <- 0
data$all_db[which(is.na(data$all_db) & is.na(data$Z034) & data$prev_db == 0)]  <- 0


table(data$Q029, useNA = c("always"))
data$gluc_test_ever <- ifelse(data$Q029 %in% c(1,2,3,4,5), 1, 
                              ifelse(data$Q029 %in% c(6), 0, NA))
table(data$Q029, data$gluc_test_ever, useNA = c("always"))


table(data$Q054, useNA = c("always"))
table(data$Q054, data$Q030, useNA = c("always"))
data$feet <- ifelse(data$Q054 %in% c(1,2), 1,
                    ifelse(data$Q054 %in% c(3,4,5,6), 0, NA))
data[which(is.na(data$feet) & data$Q030 %in% (0)), ]$feet <- 0
table(data$feet, data$Q054, useNA = c("always"))
table(data$feet, data$Q030, useNA = c("always"))
table(data$feet, data$prev_db, useNA = c("always"))
table(data$feet, useNA = c("always"))


table(data$Q053, useNA = c("always"))
table(data$Q053, data$Q030, useNA = c("always"))
data$eyes <- ifelse(data$Q053 %in% c(1,2,3), 1,
                    ifelse(data$Q053 %in% c(4,5,6), 0, NA))
data[which(is.na(data$eyes) & data$Q030 %in% (0)), ]$eyes <- 0
table(data$eyes, data$Q053, useNA = c("always"))
table(data$eyes, data$Q030, useNA = c("always"))
table(data$eyes, data$prev_db, useNA = c("always"))
table(data$eyes, useNA = c("always"))



### EXTRACT DATA
attach(data)
df <- data.frame(
  study_id = "BRA_2013_PNS",  #replace with STEPS id
  country = "Brasil",   #replace with full name of the country (first letter capital, e.g., Peru)
  region  = "Americas",   #replace with full name of the WHO region (first letter capital)
  data_year = 2013,   #replace with year of data collection (if two, use the latest: 2016-2017 -> 2017)
  coverage = "National",  #replace with "National" or "Subnational"
  
  psu     = UPA_PNS,
  stratum = V0024,
  wstep1  = V00291,
  wstep2  = V00291, #Weight of selected resident with correction for non-interview with calibration by population projection for selected resident - used in the calculation of selected resident indicators
  wstep3  = V00291,  
  
  survey_min_age = 18,   #replace with the youngest age target
  survey_max_age = NA,   #replace with the oldest age target
  
  participant_id = 1:nrow(data),   #if not available use 1:nrow(data)
  sex            = C006,   #replace with sex variable 
  age            = C008,   #replace with age variable (years)
  is_urban       = NA,   #replace with urban/rural variable (later change to urban=1, rural=0)
  is_pregnant = NA,
  
  weight = W00103,
  height = W00201,
  waist  = W00303,
  sbp1 = W00401, sbp2 = W00403, sbp3 = W00405,
  dbp1 = W00402, dbp2 = W00404, dbp3 = W00406,
  bp_measured = NA,
  self_hyper = Q002,
  drug_hyper=drug_hyper,
  glucose_measured = NA, 
  self_diabetes = prev_db, 
  drug_diabetes = NA,
  
  u_creatinine = Z048,  #replace with spot urinary creatinine variable in mmol/l
  u_sodium = Z046,  #replace with spot urinary sodium variable in mmol/l
salt = Z049
  )
detach(data)

# ALWAYS "detach" AFTER "attach"

### CHECK AND RECODE 

## DEMOGRAPHIC VARIABLES
#always verify that men=1 and women=2 //
df <- df %>% mutate(sex = ifelse(sex  == 1, 1, 2))

#always verify the units and change if needed // if height is in cm:
df$height<-df$height/100

table(df$is_pregnant)
#always verify that is_pregnant is coded like 0= no, 1= pregnant, convert with:
df[df == "Inf"] <- NA #inf means missing

### ANTHROPOMETRIC VARIABLES
summary(df)

#recode abnormal (eg. anthropomethric measures: negative, "888" "999") values to NA
df <- df %>% mutate(waist = ifelse(waist <= 0, NA, waist))
df <- df %>% mutate(waist = ifelse(waist == 999, NA, waist))
df <- df %>% mutate(waist = ifelse(waist >= 888, NA, waist))

df <- df %>% mutate(height = ifelse(height <= 0, NA, height))
df <- df %>% mutate(height = ifelse(height == 999, NA, height))
df <- df %>% mutate(height = ifelse(height >= 8.8, NA, height))
df <- df %>% mutate(height = ifelse(height >= 5, NA, height))

df <- df %>% mutate(weight = ifelse(weight <= 0, NA, weight))
df <- df %>% mutate(weight = ifelse(weight == 999, NA, weight))
df <- df %>% mutate(weight = ifelse(weight >= 888, NA, weight))

##BP 

df <- df %>% mutate(sbp1 = ifelse(sbp1 >= 888, NA, sbp1))
df <- df %>% mutate(sbp2 = ifelse(sbp2 >= 888, NA, sbp2))
df <- df %>% mutate(sbp3 = ifelse(sbp3 >= 888, NA, sbp3))
df <- df %>% mutate(dbp1 = ifelse(dbp1 >= 888, NA, dbp1))
df <- df %>% mutate(dbp2 = ifelse(dbp2 >= 888, NA, dbp2))
df <- df %>% mutate(dbp3 = ifelse(dbp3 >= 888, NA, dbp3))


########
df <- df %>% mutate(estimated_sodium_excretion_tanaka = 21.98 *((u_sodium/(u_creatinine*10/0.08842))*(16.14*(height*100)+14.89*weight-2.04*age-2244.45))^0.392)
df$estimated_sodium_excretion_tanaka<-df$estimated_sodium_excretion_tanaka/17.1
ggplot(data = df,aes( x = estimated_sodium_excretion_tanaka, y = age)) + geom_point()

